From e4d919188554a77c798a267e098059bc9aa39726 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:34 -0700
Subject: [PATCH] lockdep: locking init debugging improvement

Locking init improvement:

 - introduce and use __SPIN_LOCK_UNLOCKED for array initializations,
   to pass in the name string of locks, used by debugging

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 net/ipv4/tcp_ipv4.c      | 2 +-
 net/ipv4/tcp_minisocks.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8355b729fa9..823717285c6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -90,7 +90,7 @@ static struct socket *tcp_socket;
 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
 
 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
-	.lhash_lock	= RW_LOCK_UNLOCKED,
+	.lhash_lock	= __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
 	.lhash_users	= ATOMIC_INIT(0),
 	.lhash_wait	= __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
 };
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e0851697ad5..0ccb7cb22b1 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -40,7 +40,7 @@ int sysctl_tcp_abort_on_overflow;
 struct inet_timewait_death_row tcp_death_row = {
 	.sysctl_max_tw_buckets = NR_FILE * 2,
 	.period		= TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
-	.death_lock	= SPIN_LOCK_UNLOCKED,
+	.death_lock	= __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock),
 	.hashinfo	= &tcp_hashinfo,
 	.tw_timer	= TIMER_INITIALIZER(inet_twdr_hangman, 0,
 					    (unsigned long)&tcp_death_row),
-- 
cgit v1.2.3-70-g09d2


From 8a25d5debff2daee280e83e09d8c25d67c26a972 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:54 -0700
Subject: [PATCH] lockdep: prove spinlock rwlock locking correctness

Use the lock validator framework to prove spinlock and rwlock locking
correctness.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/spinlock.h       |  7 ++++
 include/linux/spinlock.h          | 63 ++++++++++++++++++++++---------
 include/linux/spinlock_api_smp.h  |  2 +
 include/linux/spinlock_api_up.h   |  1 +
 include/linux/spinlock_types.h    | 32 ++++++++++++++--
 include/linux/spinlock_types_up.h |  9 ++++-
 include/linux/spinlock_up.h       |  1 -
 kernel/Makefile                   |  1 +
 kernel/sched.c                    | 10 +++++
 kernel/spinlock.c                 | 79 ++++++++++++++++++++++++++++++++++-----
 lib/kernel_lock.c                 |  7 +++-
 lib/spinlock_debug.c              | 36 ++++++++++++++++++
 net/ipv4/route.c                  |  3 +-
 13 files changed, 217 insertions(+), 34 deletions(-)

(limited to 'net/ipv4')

diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h
index 7e29b51bcaa..87c40f83065 100644
--- a/include/asm-i386/spinlock.h
+++ b/include/asm-i386/spinlock.h
@@ -68,6 +68,12 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 		"=m" (lock->slock) : : "memory");
 }
 
+/*
+ * It is easier for the lock validator if interrupts are not re-enabled
+ * in the middle of a lock-acquire. This is a performance feature anyway
+ * so we turn it off:
+ */
+#ifndef CONFIG_PROVE_LOCKING
 static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
 {
 	alternative_smp(
@@ -75,6 +81,7 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla
 		__raw_spin_lock_string_up,
 		"=m" (lock->slock) : "r" (flags) : "memory");
 }
+#endif
 
 static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index ae23beef9cc..31473db92d3 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -82,14 +82,40 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
 /*
  * Pull the __raw*() functions/declarations (UP-nondebug doesnt need them):
  */
-#if defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
 # include <asm/spinlock.h>
 #else
 # include <linux/spinlock_up.h>
 #endif
 
-#define spin_lock_init(lock)	do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0)
-#define rwlock_init(lock)	do { *(lock) = RW_LOCK_UNLOCKED; } while (0)
+#ifdef CONFIG_DEBUG_SPINLOCK
+  extern void __spin_lock_init(spinlock_t *lock, const char *name,
+			       struct lock_class_key *key);
+# define spin_lock_init(lock)					\
+do {								\
+	static struct lock_class_key __key;			\
+								\
+	__spin_lock_init((lock), #lock, &__key);		\
+} while (0)
+
+#else
+# define spin_lock_init(lock)					\
+	do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0)
+#endif
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+  extern void __rwlock_init(rwlock_t *lock, const char *name,
+			    struct lock_class_key *key);
+# define rwlock_init(lock)					\
+do {								\
+	static struct lock_class_key __key;			\
+								\
+	__rwlock_init((lock), #lock, &__key);			\
+} while (0)
+#else
+# define rwlock_init(lock)					\
+	do { *(lock) = RW_LOCK_UNLOCKED; } while (0)
+#endif
 
 #define spin_is_locked(lock)	__raw_spin_is_locked(&(lock)->raw_lock)
 
@@ -113,7 +139,6 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
 #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
  extern int _raw_spin_trylock(spinlock_t *lock);
  extern void _raw_spin_unlock(spinlock_t *lock);
-
  extern void _raw_read_lock(rwlock_t *lock);
  extern int _raw_read_trylock(rwlock_t *lock);
  extern void _raw_read_unlock(rwlock_t *lock);
@@ -121,17 +146,17 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
  extern int _raw_write_trylock(rwlock_t *lock);
  extern void _raw_write_unlock(rwlock_t *lock);
 #else
-# define _raw_spin_unlock(lock)		__raw_spin_unlock(&(lock)->raw_lock)
-# define _raw_spin_trylock(lock)	__raw_spin_trylock(&(lock)->raw_lock)
 # define _raw_spin_lock(lock)		__raw_spin_lock(&(lock)->raw_lock)
 # define _raw_spin_lock_flags(lock, flags) \
 		__raw_spin_lock_flags(&(lock)->raw_lock, *(flags))
+# define _raw_spin_trylock(lock)	__raw_spin_trylock(&(lock)->raw_lock)
+# define _raw_spin_unlock(lock)		__raw_spin_unlock(&(lock)->raw_lock)
 # define _raw_read_lock(rwlock)		__raw_read_lock(&(rwlock)->raw_lock)
-# define _raw_write_lock(rwlock)	__raw_write_lock(&(rwlock)->raw_lock)
-# define _raw_read_unlock(rwlock)	__raw_read_unlock(&(rwlock)->raw_lock)
-# define _raw_write_unlock(rwlock)	__raw_write_unlock(&(rwlock)->raw_lock)
 # define _raw_read_trylock(rwlock)	__raw_read_trylock(&(rwlock)->raw_lock)
+# define _raw_read_unlock(rwlock)	__raw_read_unlock(&(rwlock)->raw_lock)
+# define _raw_write_lock(rwlock)	__raw_write_lock(&(rwlock)->raw_lock)
 # define _raw_write_trylock(rwlock)	__raw_write_trylock(&(rwlock)->raw_lock)
+# define _raw_write_unlock(rwlock)	__raw_write_unlock(&(rwlock)->raw_lock)
 #endif
 
 #define read_can_lock(rwlock)		__raw_read_can_lock(&(rwlock)->raw_lock)
@@ -147,6 +172,13 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
 #define write_trylock(lock)		__cond_lock(_write_trylock(lock))
 
 #define spin_lock(lock)			_spin_lock(lock)
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass)
+#else
+# define spin_lock_nested(lock, subclass) _spin_lock(lock)
+#endif
+
 #define write_lock(lock)		_write_lock(lock)
 #define read_lock(lock)			_read_lock(lock)
 
@@ -172,21 +204,18 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
 /*
  * We inline the unlock functions in the nondebug case:
  */
-#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP)
+#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \
+	!defined(CONFIG_SMP)
 # define spin_unlock(lock)		_spin_unlock(lock)
 # define read_unlock(lock)		_read_unlock(lock)
 # define write_unlock(lock)		_write_unlock(lock)
-#else
-# define spin_unlock(lock)		__raw_spin_unlock(&(lock)->raw_lock)
-# define read_unlock(lock)		__raw_read_unlock(&(lock)->raw_lock)
-# define write_unlock(lock)		__raw_write_unlock(&(lock)->raw_lock)
-#endif
-
-#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP)
 # define spin_unlock_irq(lock)		_spin_unlock_irq(lock)
 # define read_unlock_irq(lock)		_read_unlock_irq(lock)
 # define write_unlock_irq(lock)		_write_unlock_irq(lock)
 #else
+# define spin_unlock(lock)		__raw_spin_unlock(&(lock)->raw_lock)
+# define read_unlock(lock)		__raw_read_unlock(&(lock)->raw_lock)
+# define write_unlock(lock)		__raw_write_unlock(&(lock)->raw_lock)
 # define spin_unlock_irq(lock) \
     do { __raw_spin_unlock(&(lock)->raw_lock); local_irq_enable(); } while (0)
 # define read_unlock_irq(lock) \
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 78e6989ffb5..b2c4f829946 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -20,6 +20,8 @@ int in_lock_functions(unsigned long addr);
 #define assert_spin_locked(x)	BUG_ON(!spin_is_locked(x))
 
 void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(spinlock_t);
+void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
+							__acquires(spinlock_t);
 void __lockfunc _read_lock(rwlock_t *lock)		__acquires(rwlock_t);
 void __lockfunc _write_lock(rwlock_t *lock)		__acquires(rwlock_t);
 void __lockfunc _spin_lock_bh(spinlock_t *lock)		__acquires(spinlock_t);
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index cd81cee566f..67faa044c5f 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -49,6 +49,7 @@
   do { local_irq_restore(flags); __UNLOCK(lock); } while (0)
 
 #define _spin_lock(lock)			__LOCK(lock)
+#define _spin_lock_nested(lock, subclass)	__LOCK(lock)
 #define _read_lock(lock)			__LOCK(lock)
 #define _write_lock(lock)			__LOCK(lock)
 #define _spin_lock_bh(lock)			__LOCK_BH(lock)
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index f5d4ed7bc78..dc5fb69e4de 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -9,6 +9,8 @@
  * Released under the General Public License (GPL).
  */
 
+#include <linux/lockdep.h>
+
 #if defined(CONFIG_SMP)
 # include <asm/spinlock_types.h>
 #else
@@ -24,6 +26,9 @@ typedef struct {
 	unsigned int magic, owner_cpu;
 	void *owner;
 #endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
 } spinlock_t;
 
 #define SPINLOCK_MAGIC		0xdead4ead
@@ -37,28 +42,47 @@ typedef struct {
 	unsigned int magic, owner_cpu;
 	void *owner;
 #endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
 } rwlock_t;
 
 #define RWLOCK_MAGIC		0xdeaf1eed
 
 #define SPINLOCK_OWNER_INIT	((void *)-1L)
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define SPIN_DEP_MAP_INIT(lockname)	.dep_map = { .name = #lockname }
+#else
+# define SPIN_DEP_MAP_INIT(lockname)
+#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define RW_DEP_MAP_INIT(lockname)	.dep_map = { .name = #lockname }
+#else
+# define RW_DEP_MAP_INIT(lockname)
+#endif
+
 #ifdef CONFIG_DEBUG_SPINLOCK
 # define __SPIN_LOCK_UNLOCKED(lockname)					\
 	(spinlock_t)	{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED,	\
 				.magic = SPINLOCK_MAGIC,		\
 				.owner = SPINLOCK_OWNER_INIT,		\
-				.owner_cpu = -1 }
+				.owner_cpu = -1,			\
+				SPIN_DEP_MAP_INIT(lockname) }
 #define __RW_LOCK_UNLOCKED(lockname)					\
 	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
 				.magic = RWLOCK_MAGIC,			\
 				.owner = SPINLOCK_OWNER_INIT,		\
-				.owner_cpu = -1 }
+				.owner_cpu = -1,			\
+				RW_DEP_MAP_INIT(lockname) }
 #else
 # define __SPIN_LOCK_UNLOCKED(lockname) \
-	(spinlock_t)	{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED }
+	(spinlock_t)	{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED,	\
+				SPIN_DEP_MAP_INIT(lockname) }
 #define __RW_LOCK_UNLOCKED(lockname) \
-	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED }
+	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
+				RW_DEP_MAP_INIT(lockname) }
 #endif
 
 #define SPIN_LOCK_UNLOCKED	__SPIN_LOCK_UNLOCKED(old_style_spin_init)
diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
index 04135b0e198..27644af20b7 100644
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -12,10 +12,14 @@
  * Released under the General Public License (GPL).
  */
 
-#ifdef CONFIG_DEBUG_SPINLOCK
+#if defined(CONFIG_DEBUG_SPINLOCK) || \
+	defined(CONFIG_DEBUG_LOCK_ALLOC)
 
 typedef struct {
 	volatile unsigned int slock;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
 } raw_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED { 1 }
@@ -30,6 +34,9 @@ typedef struct { } raw_spinlock_t;
 
 typedef struct {
 	/* no debug version on UP */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
 } raw_rwlock_t;
 
 #define __RAW_RW_LOCK_UNLOCKED { }
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 31accf2f0b1..ea54c4c9a4e 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -18,7 +18,6 @@
  */
 
 #ifdef CONFIG_DEBUG_SPINLOCK
-
 #define __raw_spin_is_locked(x)		((x)->slock == 0)
 
 static inline void __raw_spin_lock(raw_spinlock_t *lock)
diff --git a/kernel/Makefile b/kernel/Makefile
index df6ef326369..47dbcd570cd 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
 obj-$(CONFIG_SMP) += cpu.o spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
+obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_UID16) += uid16.o
 obj-$(CONFIG_MODULES) += module.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
diff --git a/kernel/sched.c b/kernel/sched.c
index 91182996653..ae4db0185bb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -308,6 +308,13 @@ static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
 	/* this is a valid case when another task releases the spinlock */
 	rq->lock.owner = current;
 #endif
+	/*
+	 * If we are tracking spinlock dependencies then we have to
+	 * fix up the runqueue lock - which gets 'carried over' from
+	 * prev into current:
+	 */
+	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
+
 	spin_unlock_irq(&rq->lock);
 }
 
@@ -1778,6 +1785,7 @@ task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next)
 		WARN_ON(rq->prev_mm);
 		rq->prev_mm = oldmm;
 	}
+	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
 
 	/* Here we just switch the register state and the stack. */
 	switch_to(prev, next, prev);
@@ -4384,6 +4392,7 @@ asmlinkage long sys_sched_yield(void)
 	 * no need to preempt or enable interrupts:
 	 */
 	__release(rq->lock);
+	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
 	_raw_spin_unlock(&rq->lock);
 	preempt_enable_no_resched();
 
@@ -4447,6 +4456,7 @@ int cond_resched_lock(spinlock_t *lock)
 		spin_lock(lock);
 	}
 	if (need_resched() && __resched_legal()) {
+		spin_release(&lock->dep_map, 1, _THIS_IP_);
 		_raw_spin_unlock(lock);
 		preempt_enable_no_resched();
 		__cond_resched();
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index b31e54eadf5..bfd6ad9c033 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -13,6 +13,7 @@
 #include <linux/preempt.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/debug_locks.h>
 #include <linux/module.h>
 
 /*
@@ -29,8 +30,10 @@ EXPORT_SYMBOL(generic__raw_read_trylock);
 int __lockfunc _spin_trylock(spinlock_t *lock)
 {
 	preempt_disable();
-	if (_raw_spin_trylock(lock))
+	if (_raw_spin_trylock(lock)) {
+		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
 		return 1;
+	}
 	
 	preempt_enable();
 	return 0;
@@ -40,8 +43,10 @@ EXPORT_SYMBOL(_spin_trylock);
 int __lockfunc _read_trylock(rwlock_t *lock)
 {
 	preempt_disable();
-	if (_raw_read_trylock(lock))
+	if (_raw_read_trylock(lock)) {
+		rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);
 		return 1;
+	}
 
 	preempt_enable();
 	return 0;
@@ -51,19 +56,28 @@ EXPORT_SYMBOL(_read_trylock);
 int __lockfunc _write_trylock(rwlock_t *lock)
 {
 	preempt_disable();
-	if (_raw_write_trylock(lock))
+	if (_raw_write_trylock(lock)) {
+		rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);
 		return 1;
+	}
 
 	preempt_enable();
 	return 0;
 }
 EXPORT_SYMBOL(_write_trylock);
 
-#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP)
+/*
+ * If lockdep is enabled then we use the non-preemption spin-ops
+ * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
+ * not re-enabled during lock-acquire (which the preempt-spin-ops do):
+ */
+#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \
+	defined(CONFIG_PROVE_LOCKING)
 
 void __lockfunc _read_lock(rwlock_t *lock)
 {
 	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_read_lock(lock);
 }
 EXPORT_SYMBOL(_read_lock);
@@ -74,7 +88,17 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 
 	local_irq_save(flags);
 	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	/*
+	 * On lockdep we dont want the hand-coded irq-enable of
+	 * _raw_spin_lock_flags() code, because lockdep assumes
+	 * that interrupts are not re-enabled during lock-acquire:
+	 */
+#ifdef CONFIG_PROVE_LOCKING
+	_raw_spin_lock(lock);
+#else
 	_raw_spin_lock_flags(lock, &flags);
+#endif
 	return flags;
 }
 EXPORT_SYMBOL(_spin_lock_irqsave);
@@ -83,6 +107,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock)
 {
 	local_irq_disable();
 	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_spin_lock(lock);
 }
 EXPORT_SYMBOL(_spin_lock_irq);
@@ -91,6 +116,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock)
 {
 	local_bh_disable();
 	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_spin_lock(lock);
 }
 EXPORT_SYMBOL(_spin_lock_bh);
@@ -101,6 +127,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
 
 	local_irq_save(flags);
 	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_read_lock(lock);
 	return flags;
 }
@@ -110,6 +137,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock)
 {
 	local_irq_disable();
 	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_read_lock(lock);
 }
 EXPORT_SYMBOL(_read_lock_irq);
@@ -118,6 +146,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock)
 {
 	local_bh_disable();
 	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_read_lock(lock);
 }
 EXPORT_SYMBOL(_read_lock_bh);
@@ -128,6 +157,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
 
 	local_irq_save(flags);
 	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_write_lock(lock);
 	return flags;
 }
@@ -137,6 +167,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock)
 {
 	local_irq_disable();
 	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_write_lock(lock);
 }
 EXPORT_SYMBOL(_write_lock_irq);
@@ -145,6 +176,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock)
 {
 	local_bh_disable();
 	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_write_lock(lock);
 }
 EXPORT_SYMBOL(_write_lock_bh);
@@ -152,6 +184,7 @@ EXPORT_SYMBOL(_write_lock_bh);
 void __lockfunc _spin_lock(spinlock_t *lock)
 {
 	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_spin_lock(lock);
 }
 
@@ -160,6 +193,7 @@ EXPORT_SYMBOL(_spin_lock);
 void __lockfunc _write_lock(rwlock_t *lock)
 {
 	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_write_lock(lock);
 }
 
@@ -255,8 +289,22 @@ BUILD_LOCK_OPS(write, rwlock);
 
 #endif /* CONFIG_PREEMPT */
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
+{
+	preempt_disable();
+	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+	_raw_spin_lock(lock);
+}
+
+EXPORT_SYMBOL(_spin_lock_nested);
+
+#endif
+
 void __lockfunc _spin_unlock(spinlock_t *lock)
 {
+	spin_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_spin_unlock(lock);
 	preempt_enable();
 }
@@ -264,6 +312,7 @@ EXPORT_SYMBOL(_spin_unlock);
 
 void __lockfunc _write_unlock(rwlock_t *lock)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_write_unlock(lock);
 	preempt_enable();
 }
@@ -271,6 +320,7 @@ EXPORT_SYMBOL(_write_unlock);
 
 void __lockfunc _read_unlock(rwlock_t *lock)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_read_unlock(lock);
 	preempt_enable();
 }
@@ -278,6 +328,7 @@ EXPORT_SYMBOL(_read_unlock);
 
 void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
 {
+	spin_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_spin_unlock(lock);
 	local_irq_restore(flags);
 	preempt_enable();
@@ -286,6 +337,7 @@ EXPORT_SYMBOL(_spin_unlock_irqrestore);
 
 void __lockfunc _spin_unlock_irq(spinlock_t *lock)
 {
+	spin_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_spin_unlock(lock);
 	local_irq_enable();
 	preempt_enable();
@@ -294,14 +346,16 @@ EXPORT_SYMBOL(_spin_unlock_irq);
 
 void __lockfunc _spin_unlock_bh(spinlock_t *lock)
 {
+	spin_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_spin_unlock(lock);
 	preempt_enable_no_resched();
-	local_bh_enable();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
 EXPORT_SYMBOL(_spin_unlock_bh);
 
 void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_read_unlock(lock);
 	local_irq_restore(flags);
 	preempt_enable();
@@ -310,6 +364,7 @@ EXPORT_SYMBOL(_read_unlock_irqrestore);
 
 void __lockfunc _read_unlock_irq(rwlock_t *lock)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_read_unlock(lock);
 	local_irq_enable();
 	preempt_enable();
@@ -318,14 +373,16 @@ EXPORT_SYMBOL(_read_unlock_irq);
 
 void __lockfunc _read_unlock_bh(rwlock_t *lock)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_read_unlock(lock);
 	preempt_enable_no_resched();
-	local_bh_enable();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
 EXPORT_SYMBOL(_read_unlock_bh);
 
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_write_unlock(lock);
 	local_irq_restore(flags);
 	preempt_enable();
@@ -334,6 +391,7 @@ EXPORT_SYMBOL(_write_unlock_irqrestore);
 
 void __lockfunc _write_unlock_irq(rwlock_t *lock)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_write_unlock(lock);
 	local_irq_enable();
 	preempt_enable();
@@ -342,9 +400,10 @@ EXPORT_SYMBOL(_write_unlock_irq);
 
 void __lockfunc _write_unlock_bh(rwlock_t *lock)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_write_unlock(lock);
 	preempt_enable_no_resched();
-	local_bh_enable();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
 EXPORT_SYMBOL(_write_unlock_bh);
 
@@ -352,11 +411,13 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
 {
 	local_bh_disable();
 	preempt_disable();
-	if (_raw_spin_trylock(lock))
+	if (_raw_spin_trylock(lock)) {
+		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
 		return 1;
+	}
 
 	preempt_enable_no_resched();
-	local_bh_enable();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 	return 0;
 }
 EXPORT_SYMBOL(_spin_trylock_bh);
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index e713e86811a..e0fdfddb406 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -177,7 +177,12 @@ static inline void __lock_kernel(void)
 
 static inline void __unlock_kernel(void)
 {
-	spin_unlock(&kernel_flag);
+	/*
+	 * the BKL is not covered by lockdep, so we open-code the
+	 * unlocking sequence (and thus avoid the dep-chain ops):
+	 */
+	_raw_spin_unlock(&kernel_flag);
+	preempt_enable();
 }
 
 /*
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 3de2ccf48ac..3d9c4dc965e 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -12,6 +12,42 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 
+void __spin_lock_init(spinlock_t *lock, const char *name,
+		      struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	/*
+	 * Make sure we are not reinitializing a held lock:
+	 */
+	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+	lockdep_init_map(&lock->dep_map, name, key);
+#endif
+	lock->raw_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+	lock->magic = SPINLOCK_MAGIC;
+	lock->owner = SPINLOCK_OWNER_INIT;
+	lock->owner_cpu = -1;
+}
+
+EXPORT_SYMBOL(__spin_lock_init);
+
+void __rwlock_init(rwlock_t *lock, const char *name,
+		   struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	/*
+	 * Make sure we are not reinitializing a held lock:
+	 */
+	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+	lockdep_init_map(&lock->dep_map, name, key);
+#endif
+	lock->raw_lock = (raw_rwlock_t) __RAW_RW_LOCK_UNLOCKED;
+	lock->magic = RWLOCK_MAGIC;
+	lock->owner = SPINLOCK_OWNER_INIT;
+	lock->owner_cpu = -1;
+}
+
+EXPORT_SYMBOL(__rwlock_init);
+
 static void spin_bug(spinlock_t *lock, const char *msg)
 {
 	struct task_struct *owner = NULL;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index da44fabf4dc..283a72247e5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -205,7 +205,8 @@ __u8 ip_tos2prio[16] = {
 struct rt_hash_bucket {
 	struct rtable	*chain;
 };
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
+	defined(CONFIG_PROVE_LOCKING)
 /*
  * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
  * The size of this table is a power of two and depends on the number of CPUS.
-- 
cgit v1.2.3-70-g09d2


From 6205120044bb75ca06019491d1aa0e727fdd35be Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:59 -0700
Subject: [PATCH] lockdep: fix RT_HASH_LOCK_SZ

On lockdep we have a quite big spinlock_t, so keep the size down.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 net/ipv4/route.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 283a72247e5..2dc6dbb2846 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -210,17 +210,22 @@ struct rt_hash_bucket {
 /*
  * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
  * The size of this table is a power of two and depends on the number of CPUS.
+ * (on lockdep we have a quite big spinlock_t, so keep the size down there)
  */
-#if NR_CPUS >= 32
-#define RT_HASH_LOCK_SZ	4096
-#elif NR_CPUS >= 16
-#define RT_HASH_LOCK_SZ	2048
-#elif NR_CPUS >= 8
-#define RT_HASH_LOCK_SZ	1024
-#elif NR_CPUS >= 4
-#define RT_HASH_LOCK_SZ	512
+#ifdef CONFIG_LOCKDEP
+# define RT_HASH_LOCK_SZ	256
 #else
-#define RT_HASH_LOCK_SZ	256
+# if NR_CPUS >= 32
+#  define RT_HASH_LOCK_SZ	4096
+# elif NR_CPUS >= 16
+#  define RT_HASH_LOCK_SZ	2048
+# elif NR_CPUS >= 8
+#  define RT_HASH_LOCK_SZ	1024
+# elif NR_CPUS >= 4
+#  define RT_HASH_LOCK_SZ	512
+# else
+#  define RT_HASH_LOCK_SZ	256
+# endif
 #endif
 
 static spinlock_t	*rt_hash_locks;
-- 
cgit v1.2.3-70-g09d2


From c63661848581a9842dfc72d9a400285dd284fc47 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:13 -0700
Subject: [PATCH] lockdep: annotate bh_lock_sock()

Teach special (recursive) locking code to the lock validator.  Has no effect
on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/net/sock.h  | 3 +++
 net/ipv4/tcp_ipv4.c | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/include/net/sock.h b/include/net/sock.h
index 83805feea88..0969fb60d6e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -751,6 +751,9 @@ extern void FASTCALL(release_sock(struct sock *sk));
 
 /* BH context may only use the following locking interface. */
 #define bh_lock_sock(__sk)	spin_lock(&((__sk)->sk_lock.slock))
+#define bh_lock_sock_nested(__sk) \
+				spin_lock_nested(&((__sk)->sk_lock.slock), \
+				SINGLE_DEPTH_NESTING)
 #define bh_unlock_sock(__sk)	spin_unlock(&((__sk)->sk_lock.slock))
 
 extern struct sock		*sk_alloc(int family,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 823717285c6..5a886e6efbb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1090,7 +1090,7 @@ process:
 
 	skb->dev = NULL;
 
-	bh_lock_sock(sk);
+	bh_lock_sock_nested(sk);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
 #ifdef CONFIG_NET_DMA
-- 
cgit v1.2.3-70-g09d2


From bbcf467dab42ea3c85f368df346c82af2fbba665 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 3 Jul 2006 19:38:35 -0700
Subject: [NET]: Verify gso_type too in gso_segment

We don't want nasty Xen guests to pass a TCPv6 packet in with gso_type set
to TCPv4 or even UDP (or a packet that's both TCP and UDP).

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c       | 14 +++++++++++---
 net/ipv4/tcp.c           | 13 ++++++++++++-
 net/ipv6/ipv6_sockglue.c |  8 ++++++++
 3 files changed, 31 insertions(+), 4 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8d157157bf8..318d4674faa 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1106,7 +1106,15 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 	int ihl;
 	int id;
 
-	if (!pskb_may_pull(skb, sizeof(*iph)))
+	if (unlikely(skb_shinfo(skb)->gso_type &
+		     ~(SKB_GSO_TCPV4 |
+		       SKB_GSO_UDP |
+		       SKB_GSO_DODGY |
+		       SKB_GSO_TCP_ECN |
+		       0)))
+		goto out;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
 		goto out;
 
 	iph = skb->nh.iph;
@@ -1114,7 +1122,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 	if (ihl < sizeof(*iph))
 		goto out;
 
-	if (!pskb_may_pull(skb, ihl))
+	if (unlikely(!pskb_may_pull(skb, ihl)))
 		goto out;
 
 	skb->h.raw = __skb_pull(skb, ihl);
@@ -1125,7 +1133,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 
 	rcu_read_lock();
 	ops = rcu_dereference(inet_protos[proto]);
-	if (ops && ops->gso_segment)
+	if (likely(ops && ops->gso_segment))
 		segs = ops->gso_segment(skb, features);
 	rcu_read_unlock();
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 804458712d8..f6a2d9223d0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2170,8 +2170,19 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 
 	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
 		/* Packet is from an untrusted source, reset gso_segs. */
-		int mss = skb_shinfo(skb)->gso_size;
+		int type = skb_shinfo(skb)->gso_type;
+		int mss;
+
+		if (unlikely(type &
+			     ~(SKB_GSO_TCPV4 |
+			       SKB_GSO_DODGY |
+			       SKB_GSO_TCP_ECN |
+			       SKB_GSO_TCPV6 |
+			       0) ||
+			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
+			goto out;
 
+		mss = skb_shinfo(skb)->gso_size;
 		skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss;
 
 		segs = NULL;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index c28e5c28744..ec59344478d 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -64,6 +64,14 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
 	struct inet6_protocol *ops;
 	int proto;
 
+	if (unlikely(skb_shinfo(skb)->gso_type &
+		     ~(SKB_GSO_UDP |
+		       SKB_GSO_DODGY |
+		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_TCPV6 |
+		       0)))
+		goto out;
+
 	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
 		goto out;
 
-- 
cgit v1.2.3-70-g09d2


From 89114afd435a486deb8583e89f490fc274444d18 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 8 Jul 2006 13:34:32 -0700
Subject: [NET] gso: Add skb_is_gso

This patch adds the wrapper function skb_is_gso which can be used instead
of directly testing skb_shinfo(skb)->gso_size.  This makes things a little
nicer and allows us to change the primary key for indicating whether an skb
is GSO (if we ever want to do that).

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c              | 2 +-
 drivers/net/chelsio/sge.c       | 2 +-
 drivers/net/e1000/e1000_main.c  | 7 +++----
 drivers/net/forcedeth.c         | 2 +-
 drivers/net/ixgb/ixgb_main.c    | 2 +-
 drivers/net/loopback.c          | 2 +-
 drivers/net/myri10ge/myri10ge.c | 2 +-
 drivers/net/sky2.c              | 2 +-
 drivers/net/typhoon.c           | 4 ++--
 drivers/s390/net/qeth_main.c    | 2 +-
 include/linux/netdevice.h       | 2 +-
 include/linux/skbuff.h          | 5 +++++
 net/bridge/br_forward.c         | 2 +-
 net/bridge/br_netfilter.c       | 2 +-
 net/ipv4/ip_output.c            | 4 ++--
 net/ipv4/xfrm4_output.c         | 2 +-
 net/ipv6/ip6_output.c           | 4 ++--
 net/ipv6/xfrm6_output.c         | 2 +-
 18 files changed, 27 insertions(+), 23 deletions(-)

(limited to 'net/ipv4')

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 64b6a72b4f6..db73de0d251 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -1639,7 +1639,7 @@ bnx2_tx_int(struct bnx2 *bp)
 		skb = tx_buf->skb;
 #ifdef BCM_TSO 
 		/* partial BD completions possible with TSO packets */
-		if (skb_shinfo(skb)->gso_size) {
+		if (skb_is_gso(skb)) {
 			u16 last_idx, last_ring_idx;
 
 			last_idx = sw_cons +
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 87f94d939ff..61b3754f50f 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1417,7 +1417,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct cpl_tx_pkt *cpl;
 
 #ifdef NETIF_F_TSO
-	if (skb_shinfo(skb)->gso_size) {
+	if (skb_is_gso(skb)) {
 		int eth_type;
 		struct cpl_tx_pkt_lso *hdr;
 
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index f77624f5f17..1f34d06d01b 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2394,7 +2394,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 	uint8_t ipcss, ipcso, tucss, tucso, hdr_len;
 	int err;
 
-	if (skb_shinfo(skb)->gso_size) {
+	if (skb_is_gso(skb)) {
 		if (skb_header_cloned(skb)) {
 			err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 			if (err)
@@ -2519,7 +2519,7 @@ e1000_tx_map(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 		 * tso gets written back prematurely before the data is fully
 		 * DMA'd to the controller */
 		if (!skb->data_len && tx_ring->last_tx_tso &&
-		    !skb_shinfo(skb)->gso_size) {
+		    !skb_is_gso(skb)) {
 			tx_ring->last_tx_tso = 0;
 			size -= 4;
 		}
@@ -2806,8 +2806,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 
 #ifdef NETIF_F_TSO
 	/* Controller Erratum workaround */
-	if (!skb->data_len && tx_ring->last_tx_tso &&
-	    !skb_shinfo(skb)->gso_size)
+	if (!skb->data_len && tx_ring->last_tx_tso && !skb_is_gso(skb))
 		count++;
 #endif
 
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 037d870712f..ad81ec68f88 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -1495,7 +1495,7 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	np->tx_skbuff[nr] = skb;
 
 #ifdef NETIF_F_TSO
-	if (skb_shinfo(skb)->gso_size)
+	if (skb_is_gso(skb))
 		tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
 	else
 #endif
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index b91e082483f..7eb08d92913 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1173,7 +1173,7 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 	uint16_t ipcse, tucse, mss;
 	int err;
 
-	if(likely(skb_shinfo(skb)->gso_size)) {
+	if (likely(skb_is_gso(skb))) {
 		if (skb_header_cloned(skb)) {
 			err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 			if (err)
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 43fef7de8cb..997cbce9af6 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -139,7 +139,7 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
 #endif
 
 #ifdef LOOPBACK_TSO
-	if (skb_shinfo(skb)->gso_size) {
+	if (skb_is_gso(skb)) {
 		BUG_ON(skb->protocol != htons(ETH_P_IP));
 		BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP);
 
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index f4c8fd373b9..ee1de971a71 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -2116,7 +2116,7 @@ abort_linearize:
 		}
 		idx = (idx + 1) & tx->mask;
 	} while (idx != last_idx);
-	if (skb_shinfo(skb)->gso_size) {
+	if (skb_is_gso(skb)) {
 		printk(KERN_ERR
 		       "myri10ge: %s: TSO but wanted to linearize?!?!?\n",
 		       mgp->dev->name);
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 418f169a6a3..31093760aa1 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1159,7 +1159,7 @@ static unsigned tx_le_req(const struct sk_buff *skb)
 	count = sizeof(dma_addr_t) / sizeof(u32);
 	count += skb_shinfo(skb)->nr_frags * count;
 
-	if (skb_shinfo(skb)->gso_size)
+	if (skb_is_gso(skb))
 		++count;
 
 	if (skb->ip_summed == CHECKSUM_HW)
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index 063816f2b11..4103c37172f 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -805,7 +805,7 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
 	 * If problems develop with TSO, check this first.
 	 */
 	numDesc = skb_shinfo(skb)->nr_frags + 1;
-	if(skb_tso_size(skb))
+	if (skb_is_gso(skb))
 		numDesc++;
 
 	/* When checking for free space in the ring, we need to also
@@ -845,7 +845,7 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
 				TYPHOON_TX_PF_VLAN_TAG_SHIFT);
 	}
 
-	if(skb_tso_size(skb)) {
+	if (skb_is_gso(skb)) {
 		first_txd->processFlags |= TYPHOON_TX_PF_TCP_SEGMENT;
 		first_txd->numDesc++;
 
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index 8e8963f1573..329e12c1fae 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -4457,7 +4457,7 @@ qeth_send_packet(struct qeth_card *card, struct sk_buff *skb)
 	queue = card->qdio.out_qs
 		[qeth_get_priority_queue(card, skb, ipv, cast_type)];
 
-	if (skb_shinfo(skb)->gso_size)
+	if (skb_is_gso(skb))
 		large_send = card->options.large_send;
 
 	/*are we able to do TSO ? If so ,prepare and send it from here */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 85f99f60dee..0359a643001 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1001,7 +1001,7 @@ static inline int net_gso_ok(int features, int gso_type)
 
 static inline int skb_gso_ok(struct sk_buff *skb, int features)
 {
-	return net_gso_ok(features, skb_shinfo(skb)->gso_size ?
+	return net_gso_ok(features, skb_is_gso(skb) ?
 				    skb_shinfo(skb)->gso_type : 0);
 }
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3597b4f1438..0bf31b83578 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1455,5 +1455,10 @@ static inline void skb_init_secmark(struct sk_buff *skb)
 { }
 #endif
 
+static inline int skb_is_gso(const struct sk_buff *skb)
+{
+	return skb_shinfo(skb)->gso_size;
+}
+
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SKBUFF_H */
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 8be9f2123e5..6ccd32b3080 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -35,7 +35,7 @@ static inline unsigned packet_length(const struct sk_buff *skb)
 int br_dev_queue_push_xmit(struct sk_buff *skb)
 {
 	/* drop mtu oversized packets except gso */
-	if (packet_length(skb) > skb->dev->mtu && !skb_shinfo(skb)->gso_size)
+	if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
 		kfree_skb(skb);
 	else {
 #ifdef CONFIG_BRIDGE_NETFILTER
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 8298a5179ae..cbc8a389a0a 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -761,7 +761,7 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
 	if (skb->protocol == htons(ETH_P_IP) &&
 	    skb->len > skb->dev->mtu &&
-	    !skb_shinfo(skb)->gso_size)
+	    !skb_is_gso(skb))
 		return ip_fragment(skb, br_dev_queue_push_xmit);
 	else
 		return br_dev_queue_push_xmit(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ca0e714613f..7c9f9a6421b 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -209,7 +209,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
 		return dst_output(skb);
 	}
 #endif
-	if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size)
+	if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
 		return ip_fragment(skb, ip_finish_output2);
 	else
 		return ip_finish_output2(skb);
@@ -1095,7 +1095,7 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 	while (size > 0) {
 		int i;
 
-		if (skb_shinfo(skb)->gso_size)
+		if (skb_is_gso(skb))
 			len = size;
 		else {
 
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 193363e2293..d16f863cf68 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -134,7 +134,7 @@ static int xfrm4_output_finish(struct sk_buff *skb)
 	}
 #endif
 
-	if (!skb_shinfo(skb)->gso_size)
+	if (!skb_is_gso(skb))
 		return xfrm4_output_finish2(skb);
 
 	skb->protocol = htons(ETH_P_IP);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2c5b44575af..3bc74ce7880 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -147,7 +147,7 @@ static int ip6_output2(struct sk_buff *skb)
 
 int ip6_output(struct sk_buff *skb)
 {
-	if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) ||
+	if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) ||
 				dst_allfrag(skb->dst))
 		return ip6_fragment(skb, ip6_output2);
 	else
@@ -229,7 +229,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	skb->priority = sk->sk_priority;
 
 	mtu = dst_mtu(dst);
-	if ((skb->len <= mtu) || ipfragok || skb_shinfo(skb)->gso_size) {
+	if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
 		IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
 		return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
 				dst_output);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 48fccb1eca0..0eea60ea9eb 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -122,7 +122,7 @@ static int xfrm6_output_finish(struct sk_buff *skb)
 {
 	struct sk_buff *segs;
 
-	if (!skb_shinfo(skb)->gso_size)
+	if (!skb_is_gso(skb))
 		return xfrm6_output_finish2(skb);
 
 	skb->protocol = htons(ETH_P_IP);
-- 
cgit v1.2.3-70-g09d2


From a430a43d087545c96542ee64573237919109d370 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 8 Jul 2006 13:34:56 -0700
Subject: [NET] gso: Fix up GSO packets with broken checksums

Certain subsystems in the stack (e.g., netfilter) can break the partial
checksum on GSO packets.  Until they're fixed, this patch allows this to
work by recomputing the partial checksums through the GSO mechanism.

Once they've all been converted to update the partial checksum instead of
clearing it, this workaround can be removed.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  8 +++--
 include/net/protocol.h    |  2 ++
 include/net/tcp.h         |  1 +
 net/core/dev.c            | 36 ++++++++++++++++---
 net/ipv4/af_inet.c        | 36 +++++++++++++++++++
 net/ipv4/tcp_ipv4.c       | 18 ++++++++++
 net/ipv6/ipv6_sockglue.c  | 89 ++++++++++++++++++++++++++++++++---------------
 net/ipv6/tcp_ipv6.c       | 19 ++++++++++
 8 files changed, 174 insertions(+), 35 deletions(-)

(limited to 'net/ipv4')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0359a643001..76cc099c858 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -549,6 +549,7 @@ struct packet_type {
 					 struct net_device *);
 	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
 						int features);
+	int			(*gso_send_check)(struct sk_buff *skb);
 	void			*af_packet_priv;
 	struct list_head	list;
 };
@@ -1001,13 +1002,14 @@ static inline int net_gso_ok(int features, int gso_type)
 
 static inline int skb_gso_ok(struct sk_buff *skb, int features)
 {
-	return net_gso_ok(features, skb_is_gso(skb) ?
-				    skb_shinfo(skb)->gso_type : 0);
+	return net_gso_ok(features, skb_shinfo(skb)->gso_type);
 }
 
 static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
 {
-	return !skb_gso_ok(skb, dev->features);
+	return skb_is_gso(skb) &&
+	       (!skb_gso_ok(skb, dev->features) ||
+		unlikely(skb->ip_summed != CHECKSUM_HW));
 }
 
 #endif /* __KERNEL__ */
diff --git a/include/net/protocol.h b/include/net/protocol.h
index a225d6371cb..c643bce64e5 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -36,6 +36,7 @@
 struct net_protocol {
 	int			(*handler)(struct sk_buff *skb);
 	void			(*err_handler)(struct sk_buff *skb, u32 info);
+	int			(*gso_send_check)(struct sk_buff *skb);
 	struct sk_buff	       *(*gso_segment)(struct sk_buff *skb,
 					       int features);
 	int			no_policy;
@@ -51,6 +52,7 @@ struct inet6_protocol
 			       int type, int code, int offset,
 			       __u32 info);
 
+	int	(*gso_send_check)(struct sk_buff *skb);
 	struct sk_buff *(*gso_segment)(struct sk_buff *skb,
 				       int features);
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3cd803b0d7a..0720bddff1e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1086,6 +1086,7 @@ extern struct request_sock_ops tcp_request_sock_ops;
 
 extern int tcp_v4_destroy_sock(struct sock *sk);
 
+extern int tcp_v4_gso_send_check(struct sk_buff *skb);
 extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features);
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/core/dev.c b/net/core/dev.c
index 0096349ee38..4d2b5167d7f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1162,9 +1162,17 @@ int skb_checksum_help(struct sk_buff *skb, int inward)
 	unsigned int csum;
 	int ret = 0, offset = skb->h.raw - skb->data;
 
-	if (inward) {
-		skb->ip_summed = CHECKSUM_NONE;
-		goto out;
+	if (inward)
+		goto out_set_summed;
+
+	if (unlikely(skb_shinfo(skb)->gso_size)) {
+		static int warned;
+
+		WARN_ON(!warned);
+		warned = 1;
+
+		/* Let GSO fix up the checksum. */
+		goto out_set_summed;
 	}
 
 	if (skb_cloned(skb)) {
@@ -1181,6 +1189,8 @@ int skb_checksum_help(struct sk_buff *skb, int inward)
 	BUG_ON(skb->csum + 2 > offset);
 
 	*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
+
+out_set_summed:
 	skb->ip_summed = CHECKSUM_NONE;
 out:	
 	return ret;
@@ -1201,17 +1211,35 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_type *ptype;
 	int type = skb->protocol;
+	int err;
 
 	BUG_ON(skb_shinfo(skb)->frag_list);
-	BUG_ON(skb->ip_summed != CHECKSUM_HW);
 
 	skb->mac.raw = skb->data;
 	skb->mac_len = skb->nh.raw - skb->data;
 	__skb_pull(skb, skb->mac_len);
 
+	if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
+		static int warned;
+
+		WARN_ON(!warned);
+		warned = 1;
+
+		if (skb_header_cloned(skb) &&
+		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+			return ERR_PTR(err);
+	}
+
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
+			if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
+				err = ptype->gso_send_check(skb);
+				segs = ERR_PTR(err);
+				if (err || skb_gso_ok(skb, features))
+					break;
+				__skb_push(skb, skb->data - skb->nh.raw);
+			}
 			segs = ptype->gso_segment(skb, features);
 			break;
 		}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 318d4674faa..c84a32070f8 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1097,6 +1097,40 @@ int inet_sk_rebuild_header(struct sock *sk)
 
 EXPORT_SYMBOL(inet_sk_rebuild_header);
 
+static int inet_gso_send_check(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	struct net_protocol *ops;
+	int proto;
+	int ihl;
+	int err = -EINVAL;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
+		goto out;
+
+	iph = skb->nh.iph;
+	ihl = iph->ihl * 4;
+	if (ihl < sizeof(*iph))
+		goto out;
+
+	if (unlikely(!pskb_may_pull(skb, ihl)))
+		goto out;
+
+	skb->h.raw = __skb_pull(skb, ihl);
+	iph = skb->nh.iph;
+	proto = iph->protocol & (MAX_INET_PROTOS - 1);
+	err = -EPROTONOSUPPORT;
+
+	rcu_read_lock();
+	ops = rcu_dereference(inet_protos[proto]);
+	if (likely(ops && ops->gso_send_check))
+		err = ops->gso_send_check(skb);
+	rcu_read_unlock();
+
+out:
+	return err;
+}
+
 static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
@@ -1162,6 +1196,7 @@ static struct net_protocol igmp_protocol = {
 static struct net_protocol tcp_protocol = {
 	.handler =	tcp_v4_rcv,
 	.err_handler =	tcp_v4_err,
+	.gso_send_check = tcp_v4_gso_send_check,
 	.gso_segment =	tcp_tso_segment,
 	.no_policy =	1,
 };
@@ -1208,6 +1243,7 @@ static int ipv4_proc_init(void);
 static struct packet_type ip_packet_type = {
 	.type = __constant_htons(ETH_P_IP),
 	.func = ip_rcv,
+	.gso_send_check = inet_gso_send_check,
 	.gso_segment = inet_gso_segment,
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5a886e6efbb..a891133f00e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -496,6 +496,24 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 	}
 }
 
+int tcp_v4_gso_send_check(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	struct tcphdr *th;
+
+	if (!pskb_may_pull(skb, sizeof(*th)))
+		return -EINVAL;
+
+	iph = skb->nh.iph;
+	th = skb->h.th;
+
+	th->check = 0;
+	th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
+	skb->csum = offsetof(struct tcphdr, check);
+	skb->ip_summed = CHECKSUM_HW;
+	return 0;
+}
+
 /*
  *	This routine will send an RST to the other tcp.
  *
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0c17dec11c8..43327264e69 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -57,29 +57,11 @@
 
 DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly;
 
-static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
+static struct inet6_protocol *ipv6_gso_pull_exthdrs(struct sk_buff *skb,
+						    int proto)
 {
-	struct sk_buff *segs = ERR_PTR(-EINVAL);
-	struct ipv6hdr *ipv6h;
-	struct inet6_protocol *ops;
-	int proto;
+	struct inet6_protocol *ops = NULL;
 
-	if (unlikely(skb_shinfo(skb)->gso_type &
-		     ~(SKB_GSO_UDP |
-		       SKB_GSO_DODGY |
-		       SKB_GSO_TCP_ECN |
-		       SKB_GSO_TCPV6 |
-		       0)))
-		goto out;
-
-	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
-		goto out;
-
-	ipv6h = skb->nh.ipv6h;
-	proto = ipv6h->nexthdr;
-	__skb_pull(skb, sizeof(*ipv6h));
-
-	rcu_read_lock();
 	for (;;) {
 		struct ipv6_opt_hdr *opth;
 		int len;
@@ -88,30 +70,80 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
 			ops = rcu_dereference(inet6_protos[proto]);
 
 			if (unlikely(!ops))
-				goto unlock;
+				break;
 
 			if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
 				break;
 		}
 
 		if (unlikely(!pskb_may_pull(skb, 8)))
-			goto unlock;
+			break;
 
 		opth = (void *)skb->data;
 		len = opth->hdrlen * 8 + 8;
 
 		if (unlikely(!pskb_may_pull(skb, len)))
-			goto unlock;
+			break;
 
 		proto = opth->nexthdr;
 		__skb_pull(skb, len);
 	}
 
-	skb->h.raw = skb->data;
-	if (likely(ops->gso_segment))
-		segs = ops->gso_segment(skb, features);
+	return ops;
+}
+
+static int ipv6_gso_send_check(struct sk_buff *skb)
+{
+	struct ipv6hdr *ipv6h;
+	struct inet6_protocol *ops;
+	int err = -EINVAL;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+		goto out;
 
-unlock:
+	ipv6h = skb->nh.ipv6h;
+	__skb_pull(skb, sizeof(*ipv6h));
+	err = -EPROTONOSUPPORT;
+
+	rcu_read_lock();
+	ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+	if (likely(ops && ops->gso_send_check)) {
+		skb->h.raw = skb->data;
+		err = ops->gso_send_check(skb);
+	}
+	rcu_read_unlock();
+
+out:
+	return err;
+}
+
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
+{
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	struct ipv6hdr *ipv6h;
+	struct inet6_protocol *ops;
+
+	if (unlikely(skb_shinfo(skb)->gso_type &
+		     ~(SKB_GSO_UDP |
+		       SKB_GSO_DODGY |
+		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_TCPV6 |
+		       0)))
+		goto out;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+		goto out;
+
+	ipv6h = skb->nh.ipv6h;
+	__skb_pull(skb, sizeof(*ipv6h));
+	segs = ERR_PTR(-EPROTONOSUPPORT);
+
+	rcu_read_lock();
+	ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+	if (likely(ops && ops->gso_segment)) {
+		skb->h.raw = skb->data;
+		segs = ops->gso_segment(skb, features);
+	}
 	rcu_read_unlock();
 
 	if (unlikely(IS_ERR(segs)))
@@ -130,6 +162,7 @@ out:
 static struct packet_type ipv6_packet_type = {
 	.type = __constant_htons(ETH_P_IPV6), 
 	.func = ipv6_rcv,
+	.gso_send_check = ipv6_gso_send_check,
 	.gso_segment = ipv6_gso_segment,
 };
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5bdcb9002cf..923989d0520 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -552,6 +552,24 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
 	}
 }
 
+static int tcp_v6_gso_send_check(struct sk_buff *skb)
+{
+	struct ipv6hdr *ipv6h;
+	struct tcphdr *th;
+
+	if (!pskb_may_pull(skb, sizeof(*th)))
+		return -EINVAL;
+
+	ipv6h = skb->nh.ipv6h;
+	th = skb->h.th;
+
+	th->check = 0;
+	th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
+				     IPPROTO_TCP, 0);
+	skb->csum = offsetof(struct tcphdr, check);
+	skb->ip_summed = CHECKSUM_HW;
+	return 0;
+}
 
 static void tcp_v6_send_reset(struct sk_buff *skb)
 {
@@ -1603,6 +1621,7 @@ struct proto tcpv6_prot = {
 static struct inet6_protocol tcpv6_protocol = {
 	.handler	=	tcp_v6_rcv,
 	.err_handler	=	tcp_v6_err,
+	.gso_send_check	=	tcp_v6_gso_send_check,
 	.gso_segment	=	tcp_tso_segment,
 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
-- 
cgit v1.2.3-70-g09d2


From 26e0fd1ce2418b10713b569a195bdb679233066b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 8 Jul 2006 13:38:55 -0700
Subject: [NET]: Fix IPv4/DECnet routing rule dumping

When more rules are present than fit in a single skb, the remaining
rules are incorrectly skipped.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_rules.c | 3 ++-
 net/ipv4/fib_rules.c  | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 06e785fe575..22f321d9bf9 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -399,9 +399,10 @@ int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
 	rcu_read_lock();
 	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
 		if (idx < s_idx)
-			continue;
+			goto next;
 		if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
 			break;
+next:
 		idx++;
 	}
 	rcu_read_unlock();
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 6c642d11d4c..773b12ba4e3 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -457,13 +457,13 @@ int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
 
 	rcu_read_lock();
 	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-
 		if (idx < s_idx)
-			continue;
+			goto next;
 		if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid,
 				   cb->nlh->nlmsg_seq,
 				   RTM_NEWRULE, NLM_F_MULTI) < 0)
 			break;
+next:
 		idx++;
 	}
 	rcu_read_unlock();
-- 
cgit v1.2.3-70-g09d2


From 7466d90f858ad89dda6f47b744e0a839937907a3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 9 Jul 2006 18:18:00 -0700
Subject: [IPV4] inetpeer: Get rid of volatile from peer_total

The variable peer_total is protected by a lock.  The volatile marker
makes no sense.  This shaves off 20 bytes on i386.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inetpeer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 2160874ce7a..03ff62ebcfe 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -86,7 +86,7 @@ static struct inet_peer *peer_root = peer_avl_empty;
 static DEFINE_RWLOCK(peer_pool_lock);
 #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
 
-static volatile int peer_total;
+static int peer_total;
 /* Exported for sysctl_net_ipv4.  */
 int inet_peer_threshold = 65536 + 128;	/* start to throw entries more
 					 * aggressively at this stage */
-- 
cgit v1.2.3-70-g09d2


From c427d27452b41378e305af80db5757da048dd38e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 10 Jul 2006 14:16:32 -0700
Subject: [TCP]: Remove TCP Compound

This reverts: f890f921040fef6a35e39d15b729af1fd1a35f29

The inclusion of TCP Compound needs to be reverted at this time
because it is not 100% certain that this code conforms to the
requirements of Developer's Certificate of Origin 1.1 paragraph (b).

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig        |  10 --
 net/ipv4/Makefile       |   1 -
 net/ipv4/tcp_compound.c | 448 ------------------------------------------------
 3 files changed, 459 deletions(-)
 delete mode 100644 net/ipv4/tcp_compound.c

(limited to 'net/ipv4')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index da33393be45..8514106761b 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -572,16 +572,6 @@ config TCP_CONG_VENO
 	loss packets.
 	See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
 
-config TCP_CONG_COMPOUND
-	tristate "TCP Compound"
-	depends on EXPERIMENTAL
-	default n
-	---help---
-	TCP Compound is a sender-side only change to TCP that uses
-	a mixed Reno/Vegas approach to calculate the cwnd.
-	For further details look here:
-	  ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf
-
 endmenu
 
 config TCP_CONG_BIC
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 38b8039bdd5..4878fc5be85 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -47,7 +47,6 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
 obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
 obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
-obj-$(CONFIG_TCP_CONG_COMPOUND) += tcp_compound.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
 		      xfrm4_output.o
diff --git a/net/ipv4/tcp_compound.c b/net/ipv4/tcp_compound.c
deleted file mode 100644
index bc54f7e9aea..00000000000
--- a/net/ipv4/tcp_compound.c
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * TCP Vegas congestion control
- *
- * This is based on the congestion detection/avoidance scheme described in
- *    Lawrence S. Brakmo and Larry L. Peterson.
- *    "TCP Vegas: End to end congestion avoidance on a global internet."
- *    IEEE Journal on Selected Areas in Communication, 13(8):1465--1480,
- *    October 1995. Available from:
- *	ftp://ftp.cs.arizona.edu/xkernel/Papers/jsac.ps
- *
- * See http://www.cs.arizona.edu/xkernel/ for their implementation.
- * The main aspects that distinguish this implementation from the
- * Arizona Vegas implementation are:
- *   o We do not change the loss detection or recovery mechanisms of
- *     Linux in any way. Linux already recovers from losses quite well,
- *     using fine-grained timers, NewReno, and FACK.
- *   o To avoid the performance penalty imposed by increasing cwnd
- *     only every-other RTT during slow start, we increase during
- *     every RTT during slow start, just like Reno.
- *   o Largely to allow continuous cwnd growth during slow start,
- *     we use the rate at which ACKs come back as the "actual"
- *     rate, rather than the rate at which data is sent.
- *   o To speed convergence to the right rate, we set the cwnd
- *     to achieve the right ("actual") rate when we exit slow start.
- *   o To filter out the noise caused by delayed ACKs, we use the
- *     minimum RTT sample observed during the last RTT to calculate
- *     the actual rate.
- *   o When the sender re-starts from idle, it waits until it has
- *     received ACKs for an entire flight of new data before making
- *     a cwnd adjustment decision. The original Vegas implementation
- *     assumed senders never went idle.
- *
- *
- *   TCP Compound based on TCP Vegas
- *
- *   further details can be found here:
- *      ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf
- */
-
-#include <linux/config.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/inet_diag.h>
-
-#include <net/tcp.h>
-
-/* Default values of the Vegas variables, in fixed-point representation
- * with V_PARAM_SHIFT bits to the right of the binary point.
- */
-#define V_PARAM_SHIFT 1
-
-#define TCP_COMPOUND_ALPHA          3U
-#define TCP_COMPOUND_BETA           1U
-#define TCP_COMPOUND_GAMMA         30
-#define TCP_COMPOUND_ZETA           1
-
-/* TCP compound variables */
-struct compound {
-	u32 beg_snd_nxt;	/* right edge during last RTT */
-	u32 beg_snd_una;	/* left edge  during last RTT */
-	u32 beg_snd_cwnd;	/* saves the size of the cwnd */
-	u8 doing_vegas_now;	/* if true, do vegas for this RTT */
-	u16 cntRTT;		/* # of RTTs measured within last RTT */
-	u32 minRTT;		/* min of RTTs measured within last RTT (in usec) */
-	u32 baseRTT;		/* the min of all Vegas RTT measurements seen (in usec) */
-
-	u32 cwnd;
-	u32 dwnd;
-};
-
-/* There are several situations when we must "re-start" Vegas:
- *
- *  o when a connection is established
- *  o after an RTO
- *  o after fast recovery
- *  o when we send a packet and there is no outstanding
- *    unacknowledged data (restarting an idle connection)
- *
- * In these circumstances we cannot do a Vegas calculation at the
- * end of the first RTT, because any calculation we do is using
- * stale info -- both the saved cwnd and congestion feedback are
- * stale.
- *
- * Instead we must wait until the completion of an RTT during
- * which we actually receive ACKs.
- */
-static inline void vegas_enable(struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	struct compound *vegas = inet_csk_ca(sk);
-
-	/* Begin taking Vegas samples next time we send something. */
-	vegas->doing_vegas_now = 1;
-
-	/* Set the beginning of the next send window. */
-	vegas->beg_snd_nxt = tp->snd_nxt;
-
-	vegas->cntRTT = 0;
-	vegas->minRTT = 0x7fffffff;
-}
-
-/* Stop taking Vegas samples for now. */
-static inline void vegas_disable(struct sock *sk)
-{
-	struct compound *vegas = inet_csk_ca(sk);
-
-	vegas->doing_vegas_now = 0;
-}
-
-static void tcp_compound_init(struct sock *sk)
-{
-	struct compound *vegas = inet_csk_ca(sk);
-	const struct tcp_sock *tp = tcp_sk(sk);
-
-	vegas->baseRTT = 0x7fffffff;
-	vegas_enable(sk);
-
-	vegas->dwnd = 0;
-	vegas->cwnd = tp->snd_cwnd;
-}
-
-/* Do RTT sampling needed for Vegas.
- * Basically we:
- *   o min-filter RTT samples from within an RTT to get the current
- *     propagation delay + queuing delay (we are min-filtering to try to
- *     avoid the effects of delayed ACKs)
- *   o min-filter RTT samples from a much longer window (forever for now)
- *     to find the propagation delay (baseRTT)
- */
-static void tcp_compound_rtt_calc(struct sock *sk, u32 usrtt)
-{
-	struct compound *vegas = inet_csk_ca(sk);
-	u32 vrtt = usrtt + 1;	/* Never allow zero rtt or baseRTT */
-
-	/* Filter to find propagation delay: */
-	if (vrtt < vegas->baseRTT)
-		vegas->baseRTT = vrtt;
-
-	/* Find the min RTT during the last RTT to find
-	 * the current prop. delay + queuing delay:
-	 */
-
-	vegas->minRTT = min(vegas->minRTT, vrtt);
-	vegas->cntRTT++;
-}
-
-static void tcp_compound_state(struct sock *sk, u8 ca_state)
-{
-
-	if (ca_state == TCP_CA_Open)
-		vegas_enable(sk);
-	else
-		vegas_disable(sk);
-}
-
-
-/* 64bit divisor, dividend and result. dynamic precision */
-static inline u64 div64_64(u64 dividend, u64 divisor)
-{
-	u32 d = divisor;
-
-	if (divisor > 0xffffffffULL) {
-		unsigned int shift = fls(divisor >> 32);
-
-		d = divisor >> shift;
-		dividend >>= shift;
-	}
-
-	/* avoid 64 bit division if possible */
-	if (dividend >> 32)
-		do_div(dividend, d);
-	else
-		dividend = (u32) dividend / d;
-
-	return dividend;
-}
-
-/* calculate the quartic root of "a" using Newton-Raphson */
-static u32 qroot(u64 a)
-{
-	u32 x, x1;
-
-	/* Initial estimate is based on:
-	 * qrt(x) = exp(log(x) / 4)
-	 */
-	x = 1u << (fls64(a) >> 2);
-
-	/*
-	 * Iteration based on:
-	 *                         3
-	 * x    = ( 3 * x  +  a / x  ) / 4
-	 *  k+1          k         k
-	 */
-	do {
-		u64 x3 = x;
-
-		x1 = x;
-		x3 *= x;
-		x3 *= x;
-
-		x = (3 * x + (u32) div64_64(a, x3)) / 4;
-	} while (abs(x1 - x) > 1);
-
-	return x;
-}
-
-
-/*
- * If the connection is idle and we are restarting,
- * then we don't want to do any Vegas calculations
- * until we get fresh RTT samples.  So when we
- * restart, we reset our Vegas state to a clean
- * slate. After we get acks for this flight of
- * packets, _then_ we can make Vegas calculations
- * again.
- */
-static void tcp_compound_cwnd_event(struct sock *sk, enum tcp_ca_event event)
-{
-	if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START)
-		tcp_compound_init(sk);
-}
-
-static void tcp_compound_cong_avoid(struct sock *sk, u32 ack,
-				    u32 seq_rtt, u32 in_flight, int flag)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct compound *vegas = inet_csk_ca(sk);
-	u8 inc = 0;
-
-	if (vegas->cwnd + vegas->dwnd > tp->snd_cwnd) {
-		if (vegas->cwnd > tp->snd_cwnd || vegas->dwnd > tp->snd_cwnd) {
-			vegas->cwnd = tp->snd_cwnd;
-			vegas->dwnd = 0;
-		} else
-			vegas->cwnd = tp->snd_cwnd - vegas->dwnd;
-
-	}
-
-	if (!tcp_is_cwnd_limited(sk, in_flight))
-		return;
-
-	if (vegas->cwnd <= tp->snd_ssthresh)
-		inc = 1;
-	else if (tp->snd_cwnd_cnt < tp->snd_cwnd)
-		tp->snd_cwnd_cnt++;
-
-	if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
-		inc = 1;
-		tp->snd_cwnd_cnt = 0;
-	}
-
-	if (inc && tp->snd_cwnd < tp->snd_cwnd_clamp)
-		vegas->cwnd++;
-
-	/* The key players are v_beg_snd_una and v_beg_snd_nxt.
-	 *
-	 * These are so named because they represent the approximate values
-	 * of snd_una and snd_nxt at the beginning of the current RTT. More
-	 * precisely, they represent the amount of data sent during the RTT.
-	 * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
-	 * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding
-	 * bytes of data have been ACKed during the course of the RTT, giving
-	 * an "actual" rate of:
-	 *
-	 *     (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration)
-	 *
-	 * Unfortunately, v_beg_snd_una is not exactly equal to snd_una,
-	 * because delayed ACKs can cover more than one segment, so they
-	 * don't line up nicely with the boundaries of RTTs.
-	 *
-	 * Another unfortunate fact of life is that delayed ACKs delay the
-	 * advance of the left edge of our send window, so that the number
-	 * of bytes we send in an RTT is often less than our cwnd will allow.
-	 * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
-	 */
-
-	if (after(ack, vegas->beg_snd_nxt)) {
-		/* Do the Vegas once-per-RTT cwnd adjustment. */
-		u32 old_wnd, old_snd_cwnd;
-
-		/* Here old_wnd is essentially the window of data that was
-		 * sent during the previous RTT, and has all
-		 * been acknowledged in the course of the RTT that ended
-		 * with the ACK we just received. Likewise, old_snd_cwnd
-		 * is the cwnd during the previous RTT.
-		 */
-		if (!tp->mss_cache)
-			return;
-
-		old_wnd = (vegas->beg_snd_nxt - vegas->beg_snd_una) /
-		    tp->mss_cache;
-		old_snd_cwnd = vegas->beg_snd_cwnd;
-
-		/* Save the extent of the current window so we can use this
-		 * at the end of the next RTT.
-		 */
-		vegas->beg_snd_una = vegas->beg_snd_nxt;
-		vegas->beg_snd_nxt = tp->snd_nxt;
-		vegas->beg_snd_cwnd = tp->snd_cwnd;
-
-		/* We do the Vegas calculations only if we got enough RTT
-		 * samples that we can be reasonably sure that we got
-		 * at least one RTT sample that wasn't from a delayed ACK.
-		 * If we only had 2 samples total,
-		 * then that means we're getting only 1 ACK per RTT, which
-		 * means they're almost certainly delayed ACKs.
-		 * If  we have 3 samples, we should be OK.
-		 */
-
-		if (vegas->cntRTT > 2) {
-			u32 rtt, target_cwnd, diff;
-			u32 brtt, dwnd;
-
-			/* We have enough RTT samples, so, using the Vegas
-			 * algorithm, we determine if we should increase or
-			 * decrease cwnd, and by how much.
-			 */
-
-			/* Pluck out the RTT we are using for the Vegas
-			 * calculations. This is the min RTT seen during the
-			 * last RTT. Taking the min filters out the effects
-			 * of delayed ACKs, at the cost of noticing congestion
-			 * a bit later.
-			 */
-			rtt = vegas->minRTT;
-
-			/* Calculate the cwnd we should have, if we weren't
-			 * going too fast.
-			 *
-			 * This is:
-			 *     (actual rate in segments) * baseRTT
-			 * We keep it as a fixed point number with
-			 * V_PARAM_SHIFT bits to the right of the binary point.
-			 */
-			if (!rtt)
-				return;
-
-			brtt = vegas->baseRTT;
-			target_cwnd = ((old_wnd * brtt)
-				       << V_PARAM_SHIFT) / rtt;
-
-			/* Calculate the difference between the window we had,
-			 * and the window we would like to have. This quantity
-			 * is the "Diff" from the Arizona Vegas papers.
-			 *
-			 * Again, this is a fixed point number with
-			 * V_PARAM_SHIFT bits to the right of the binary
-			 * point.
-			 */
-
-			diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
-
-			dwnd = vegas->dwnd;
-
-			if (diff < (TCP_COMPOUND_GAMMA << V_PARAM_SHIFT)) {
-				u64 v;
-				u32 x;
-
-				/*
-				 * The TCP Compound paper describes the choice
-				 * of "k" determines the agressiveness,
-				 * ie. slope of the response function.
-				 *
-				 * For same value as HSTCP would be 0.8
-				 * but for computaional reasons, both the
-				 * original authors and this implementation
-				 * use 0.75.
-				 */
-				v = old_wnd;
-				x = qroot(v * v * v) >> TCP_COMPOUND_ALPHA;
-				if (x > 1)
-					dwnd = x - 1;
-				else
-					dwnd = 0;
-
-				dwnd += vegas->dwnd;
-
-			} else if ((dwnd << V_PARAM_SHIFT) <
-				   (diff * TCP_COMPOUND_BETA))
-				dwnd = 0;
-			else
-				dwnd =
-				    ((dwnd << V_PARAM_SHIFT) -
-				     (diff *
-				      TCP_COMPOUND_BETA)) >> V_PARAM_SHIFT;
-
-			vegas->dwnd = dwnd;
-
-		}
-
-		/* Wipe the slate clean for the next RTT. */
-		vegas->cntRTT = 0;
-		vegas->minRTT = 0x7fffffff;
-	}
-
-	tp->snd_cwnd = vegas->cwnd + vegas->dwnd;
-}
-
-/* Extract info for Tcp socket info provided via netlink. */
-static void tcp_compound_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
-{
-	const struct compound *ca = inet_csk_ca(sk);
-	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-		struct tcpvegas_info *info;
-
-		info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO,
-					  sizeof(*info)));
-
-		info->tcpv_enabled = ca->doing_vegas_now;
-		info->tcpv_rttcnt = ca->cntRTT;
-		info->tcpv_rtt = ca->baseRTT;
-		info->tcpv_minrtt = ca->minRTT;
-	rtattr_failure:;
-	}
-}
-
-static struct tcp_congestion_ops tcp_compound = {
-	.init		= tcp_compound_init,
-	.ssthresh	= tcp_reno_ssthresh,
-	.cong_avoid	= tcp_compound_cong_avoid,
-	.rtt_sample	= tcp_compound_rtt_calc,
-	.set_state	= tcp_compound_state,
-	.cwnd_event	= tcp_compound_cwnd_event,
-	.get_info	= tcp_compound_get_info,
-
-	.owner		= THIS_MODULE,
-	.name		= "compound",
-};
-
-static int __init tcp_compound_register(void)
-{
-	BUG_ON(sizeof(struct compound) > ICSK_CA_PRIV_SIZE);
-	tcp_register_congestion_control(&tcp_compound);
-	return 0;
-}
-
-static void __exit tcp_compound_unregister(void)
-{
-	tcp_unregister_congestion_control(&tcp_compound);
-}
-
-module_init(tcp_compound_register);
-module_exit(tcp_compound_unregister);
-
-MODULE_AUTHOR("Angelo P. Castellani, Stephen Hemminger");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("TCP Compound");
-- 
cgit v1.2.3-70-g09d2


From 6150c22e2ac3f7dbe73e7ae7817785070d0cff1f Mon Sep 17 00:00:00 2001
From: "Xiaoliang (David) Wei" <davidwei79@gmail.com>
Date: Tue, 11 Jul 2006 13:03:28 -0700
Subject: [TCP] tcp_highspeed: Fix AI updates.

I think there is still a problem with the AIMD parameter update in
HighSpeed TCP code.

Line 125~138 of the code (net/ipv4/tcp_highspeed.c):

	/* Update AIMD parameters */
	if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
		while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
		       ca->ai < HSTCP_AIMD_MAX - 1)
			ca->ai++;
	} else if (tp->snd_cwnd < hstcp_aimd_vals[ca->ai].cwnd) {
		while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
		       ca->ai > 0)
			ca->ai--;

In fact, the second part (decreasing ca->ai) never decreases since the
while loop's inequality is in the reverse direction. This leads to
unfairness with multiple flows (once a flow happens to enjoy a higher
ca->ai, it keeps enjoying that even its cwnd decreases)

Here is a tentative fix (I also added a comment, trying to keep the
change clear):

Acked-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_highspeed.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index aaa1538c069..fa3e1aad660 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -139,14 +139,19 @@ static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
 				tp->snd_cwnd++;
 		}
 	} else {
-		/* Update AIMD parameters */
+		/* Update AIMD parameters.
+		 *
+		 * We want to guarantee that:
+		 *     hstcp_aimd_vals[ca->ai-1].cwnd <
+		 *     snd_cwnd <=
+		 *     hstcp_aimd_vals[ca->ai].cwnd
+		 */
 		if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
 			while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
 			       ca->ai < HSTCP_AIMD_MAX - 1)
 				ca->ai++;
-		} else if (tp->snd_cwnd < hstcp_aimd_vals[ca->ai].cwnd) {
-			while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
-			       ca->ai > 0)
+		} else if (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) {
+			while (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd)
 				ca->ai--;
 		}
 
-- 
cgit v1.2.3-70-g09d2


From da952315c9c625bd513c6162613fd3fd01d91aae Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 11 Jul 2006 13:50:09 -0700
Subject: [IPCOMP]: Fix truesize after decompression

The truesize check has uncovered the fact that we forgot to update truesize
after pskb_expand_head.  Unfortunately pskb_expand_head can't update it for
us because it's used in all sorts of different contexts, some of which would
not allow truesize to be updated by itself.

So the solution for now is to simply update it in IPComp.

This patch also changes skb_put to __skb_put since we've just expanded
tailroom by exactly that amount so we know it's there (but gcc does not).

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipcomp.c  | 3 ++-
 net/ipv6/ipcomp6.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 8e037484753..8a8b5cf2f7f 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -70,7 +70,8 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 	if (err)
 		goto out;
 		
-	skb_put(skb, dlen - plen);
+	skb->truesize += dlen - plen;
+	__skb_put(skb, dlen - plen);
 	memcpy(skb->data, scratch, dlen);
 out:	
 	put_cpu();
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index b285b035708..7e4d1c17bfb 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -109,7 +109,8 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
 		goto out_put_cpu;
 	}
 
-	skb_put(skb, dlen - plen);
+	skb->truesize += dlen - plen;
+	__skb_put(skb, dlen - plen);
 	memcpy(skb->data, scratch, dlen);
 	err = ipch->nexthdr;
 
-- 
cgit v1.2.3-70-g09d2


From b47b2ec19892ffc2b06ebf138ed4aa141275a1c2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 12 Jul 2006 13:29:56 -0700
Subject: [IPV4]: Fix error handling for fib_insert_node call

The error handling around fib_insert_node was broken because we always
zeroed the error before checking it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1cb65305e10..23fb9d9768e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1252,8 +1252,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 	 */
 
 	if (!fa_head) {
-		fa_head = fib_insert_node(t, &err, key, plen);
 		err = 0;
+		fa_head = fib_insert_node(t, &err, key, plen);
 		if (err)
 			goto out_free_new_fa;
 	}
-- 
cgit v1.2.3-70-g09d2


From 53602f92dd3691616478a40738353694bcfef171 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Fri, 14 Jul 2006 14:49:32 -0700
Subject: [IPV4]: Clear skb cb on IP input

when data arrives at IP through loopback (and possibly other devices).
So the field needs to be cleared before it confuses the route code.
This was seen when running netem over loopback, but there are probably
other device cases. Maybe this should go into stable?

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_input.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e1a7dba2fa8..184c78ca79e 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -428,6 +428,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 		goto drop;
 	}
 
+	/* Remove any debris in the socket control block */
+	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+
 	return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL,
 		       ip_rcv_finish);
 
-- 
cgit v1.2.3-70-g09d2


From 5d9c5a32920c5c0e6716b0f6ed16157783dc56a4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 21 Jul 2006 14:29:53 -0700
Subject: [IPV4]: Get rid of redundant IPCB->opts initialisation

Now that we always zero the IPCB->opts in ip_rcv, it is no longer
necessary to do so before calling netif_rx for tunneled packets.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c            | 1 -
 net/ipv4/ip_options.c        | 1 -
 net/ipv4/ipip.c              | 1 -
 net/ipv4/ipmr.c              | 2 --
 net/ipv4/xfrm4_mode_tunnel.c | 1 -
 net/ipv6/sit.c               | 1 -
 6 files changed, 7 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 6ff9b10d956..0f9b3a31997 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -617,7 +617,6 @@ static int ipgre_rcv(struct sk_buff *skb)
 		skb->mac.raw = skb->nh.raw;
 		skb->nh.raw = __pskb_pull(skb, offset);
 		skb_postpull_rcsum(skb, skb->h.raw, offset);
-		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 		skb->pkt_type = PACKET_HOST;
 #ifdef CONFIG_NET_IPGRE_BROADCAST
 		if (MULTICAST(iph->daddr)) {
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index cbcae654462..406056edc02 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -256,7 +256,6 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
 
 	if (!opt) {
 		opt = &(IPCB(skb)->opt);
-		memset(opt, 0, sizeof(struct ip_options));
 		iph = skb->nh.raw;
 		opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
 		optptr = iph + sizeof(struct iphdr);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 3291d5192aa..76ab50b0d6e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -487,7 +487,6 @@ static int ipip_rcv(struct sk_buff *skb)
 
 		skb->mac.raw = skb->nh.raw;
 		skb->nh.raw = skb->data;
-		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 		skb->protocol = htons(ETH_P_IP);
 		skb->pkt_type = PACKET_HOST;
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index ba33f8621c6..9ccacf57f08 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1461,7 +1461,6 @@ int pim_rcv_v1(struct sk_buff * skb)
 	skb_pull(skb, (u8*)encap - skb->data);
 	skb->nh.iph = (struct iphdr *)skb->data;
 	skb->dev = reg_dev;
-	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 	skb->protocol = htons(ETH_P_IP);
 	skb->ip_summed = 0;
 	skb->pkt_type = PACKET_HOST;
@@ -1517,7 +1516,6 @@ static int pim_rcv(struct sk_buff * skb)
 	skb_pull(skb, (u8*)encap - skb->data);
 	skb->nh.iph = (struct iphdr *)skb->data;
 	skb->dev = reg_dev;
-	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 	skb->protocol = htons(ETH_P_IP);
 	skb->ip_summed = 0;
 	skb->pkt_type = PACKET_HOST;
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index f8d880beb12..13cafbe56ce 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -92,7 +92,6 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb->mac.raw = memmove(skb->data - skb->mac_len,
 			       skb->mac.raw, skb->mac_len);
 	skb->nh.raw = skb->data;
-	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 	err = 0;
 
 out:
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index c56aeece2bf..836eecd7e62 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -380,7 +380,6 @@ static int ipip6_rcv(struct sk_buff *skb)
 		secpath_reset(skb);
 		skb->mac.raw = skb->nh.raw;
 		skb->nh.raw = skb->data;
-		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 		IPCB(skb)->flags = 0;
 		skb->protocol = htons(ETH_P_IPV6);
 		skb->pkt_type = PACKET_HOST;
-- 
cgit v1.2.3-70-g09d2


From 0da974f4f303a6842516b764507e3c0a03f41e5a Mon Sep 17 00:00:00 2001
From: Panagiotis Issaris <takis@issaris.org>
Date: Fri, 21 Jul 2006 14:51:30 -0700
Subject: [NET]: Conversions from kmalloc+memset to k(z|c)alloc.

Signed-off-by: Panagiotis Issaris <takis@issaris.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c                            |  3 +--
 net/appletalk/ddp.c                         |  6 ++----
 net/atm/br2684.c                            |  3 +--
 net/atm/clip.c                              |  3 +--
 net/atm/lec.c                               |  3 +--
 net/atm/mpc.c                               |  3 +--
 net/atm/pppoatm.c                           |  3 +--
 net/atm/resources.c                         |  3 +--
 net/ax25/sysctl_net_ax25.c                  |  4 +---
 net/bridge/br_ioctl.c                       |  7 ++-----
 net/decnet/dn_dev.c                         |  9 ++-------
 net/decnet/dn_fib.c                         |  3 +--
 net/decnet/dn_neigh.c                       |  3 +--
 net/decnet/dn_rules.c                       |  3 +--
 net/decnet/dn_table.c                       | 11 +++--------
 net/econet/af_econet.c                      |  3 +--
 net/ieee80211/ieee80211_crypt.c             |  3 +--
 net/ieee80211/ieee80211_crypt_ccmp.c        |  3 +--
 net/ieee80211/ieee80211_crypt_wep.c         |  3 +--
 net/ieee80211/ieee80211_wx.c                |  7 ++-----
 net/ieee80211/softmac/ieee80211softmac_io.c |  3 +--
 net/ipv4/ah4.c                              |  4 +---
 net/ipv4/arp.c                              |  3 +--
 net/ipv4/devinet.c                          |  6 ++----
 net/ipv4/esp4.c                             |  4 +---
 net/ipv4/fib_hash.c                         |  6 ++----
 net/ipv4/fib_rules.c                        |  3 +--
 net/ipv4/fib_semantics.c                    |  3 +--
 net/ipv4/igmp.c                             | 12 ++++--------
 net/ipv4/inet_diag.c                        |  3 +--
 net/ipv4/ipcomp.c                           |  3 +--
 net/ipv4/ipvs/ip_vs_ctl.c                   | 10 +++-------
 net/ipv4/ipvs/ip_vs_est.c                   |  3 +--
 net/ipv4/netfilter/ipt_CLUSTERIP.c          |  3 +--
 net/ipv4/tcp_ipv4.c                         |  3 +--
 net/ipv4/udp.c                              |  3 +--
 net/ipv6/ip6_tunnel.c                       |  3 +--
 net/irda/ircomm/ircomm_core.c               |  4 +---
 net/irda/ircomm/ircomm_tty.c                |  3 +--
 net/irda/irda_device.c                      |  4 +---
 net/irda/irias_object.c                     | 24 ++++++++----------------
 net/irda/irlap.c                            |  6 ++----
 net/irda/irlap_frame.c                      |  3 +--
 net/irda/irlmp.c                            |  9 +++------
 net/irda/irnet/irnet_ppp.c                  |  3 +--
 net/irda/irttp.c                            |  9 +++------
 net/lapb/lapb_iface.c                       |  4 +---
 net/llc/llc_core.c                          |  3 +--
 net/netlink/af_netlink.c                    | 13 ++++---------
 net/rxrpc/connection.c                      |  6 ++----
 net/rxrpc/peer.c                            |  3 +--
 net/rxrpc/transport.c                       |  6 ++----
 net/sched/act_api.c                         |  9 +++------
 net/sched/act_pedit.c                       |  3 +--
 net/sched/act_police.c                      |  6 ++----
 net/sched/cls_basic.c                       |  6 ++----
 net/sched/cls_fw.c                          |  6 ++----
 net/sched/cls_route.c                       |  9 +++------
 net/sched/cls_rsvp.h                        |  9 +++------
 net/sched/cls_tcindex.c                     | 12 ++++--------
 net/sched/cls_u32.c                         | 15 +++++----------
 net/sched/em_meta.c                         |  3 +--
 net/sched/ematch.c                          |  3 +--
 net/sched/estimator.c                       |  3 +--
 net/sched/sch_cbq.c                         |  3 +--
 net/sched/sch_generic.c                     |  3 +--
 net/sched/sch_gred.c                        |  3 +--
 net/sched/sch_hfsc.c                        |  3 +--
 net/sched/sch_htb.c                         |  3 +--
 net/sunrpc/auth_gss/auth_gss.c              |  9 +++------
 net/sunrpc/auth_gss/gss_krb5_mech.c         |  3 +--
 net/sunrpc/auth_gss/gss_mech_switch.c       |  3 +--
 net/sunrpc/auth_gss/gss_spkm3_mech.c        |  3 +--
 net/sunrpc/auth_gss/gss_spkm3_token.c       |  3 +--
 net/sunrpc/clnt.c                           |  3 +--
 net/sunrpc/stats.c                          |  7 +------
 net/sunrpc/svc.c                            |  6 ++----
 net/sunrpc/svcsock.c                        |  3 +--
 net/sunrpc/xprt.c                           |  3 +--
 net/sunrpc/xprtsock.c                       |  6 ++----
 net/tipc/bearer.c                           |  6 ++----
 net/tipc/cluster.c                          |  8 ++------
 net/tipc/link.c                             |  3 +--
 net/tipc/name_table.c                       | 16 ++++------------
 net/tipc/net.c                              |  5 +----
 net/tipc/port.c                             |  5 ++---
 net/tipc/subscr.c                           |  3 +--
 net/tipc/user_reg.c                         |  3 +--
 net/tipc/zone.c                             |  3 +--
 net/unix/af_unix.c                          |  3 +--
 net/wanrouter/af_wanpipe.c                  |  9 +++------
 net/wanrouter/wanmain.c                     |  9 +++------
 net/xfrm/xfrm_policy.c                      |  3 +--
 net/xfrm/xfrm_state.c                       |  3 +--
 94 files changed, 154 insertions(+), 334 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 458031bfff5..0ade0c63fdf 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -542,12 +542,11 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
 	 * so it cannot "appear" on us.
 	 */
 	if (!grp) { /* need to add a new group */
-		grp = kmalloc(sizeof(struct vlan_group), GFP_KERNEL);
+		grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL);
 		if (!grp)
 			goto out_free_unregister;
 					
 		/* printk(KERN_ALERT "VLAN REGISTER:  Allocated new group.\n"); */
-		memset(grp, 0, sizeof(struct vlan_group));
 		grp->real_dev_ifindex = real_dev->ifindex;
 
 		hlist_add_head_rcu(&grp->hlist, 
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 5ee96d4b40e..96dc6bb52d1 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -227,12 +227,11 @@ static void atif_drop_device(struct net_device *dev)
 static struct atalk_iface *atif_add_device(struct net_device *dev,
 					   struct atalk_addr *sa)
 {
-	struct atalk_iface *iface = kmalloc(sizeof(*iface), GFP_KERNEL);
+	struct atalk_iface *iface = kzalloc(sizeof(*iface), GFP_KERNEL);
 
 	if (!iface)
 		goto out;
 
-	memset(iface, 0, sizeof(*iface));
 	dev_hold(dev);
 	iface->dev = dev;
 	dev->atalk_ptr = iface;
@@ -559,12 +558,11 @@ static int atrtr_create(struct rtentry *r, struct net_device *devhint)
 	}
 
 	if (!rt) {
-		rt = kmalloc(sizeof(*rt), GFP_ATOMIC);
+		rt = kzalloc(sizeof(*rt), GFP_ATOMIC);
 
 		retval = -ENOBUFS;
 		if (!rt)
 			goto out_unlock;
-		memset(rt, 0, sizeof(*rt));
 
 		rt->next = atalk_routes;
 		atalk_routes = rt;
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index a487233dc46..d00cca97eb3 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -508,10 +508,9 @@ Note: we do not have explicit unassign, but look at _push()
 
 	if (copy_from_user(&be, arg, sizeof be))
 		return -EFAULT;
-	brvcc = kmalloc(sizeof(struct br2684_vcc), GFP_KERNEL);
+	brvcc = kzalloc(sizeof(struct br2684_vcc), GFP_KERNEL);
 	if (!brvcc)
 		return -ENOMEM;
-	memset(brvcc, 0, sizeof(struct br2684_vcc));
 	write_lock_irq(&devs_lock);
 	net_dev = br2684_find_dev(&be.ifspec);
 	if (net_dev == NULL) {
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 2e62105d91b..7ce7bfe3fba 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -929,12 +929,11 @@ static int arp_seq_open(struct inode *inode, struct file *file)
 	struct seq_file *seq;
 	int rc = -EAGAIN;
 
-	state = kmalloc(sizeof(*state), GFP_KERNEL);
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
 	if (!state) {
 		rc = -ENOMEM;
 		goto out_kfree;
 	}
-	memset(state, 0, sizeof(*state));
 	state->ns.neigh_sub_iter = clip_seq_sub_iter;
 
 	rc = seq_open(file, &arp_seq_ops);
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 4b68a18171c..b4aa489849d 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1811,12 +1811,11 @@ make_entry(struct lec_priv *priv, unsigned char *mac_addr)
 {
         struct lec_arp_table *to_return;
 
-        to_return = kmalloc(sizeof(struct lec_arp_table), GFP_ATOMIC);
+        to_return = kzalloc(sizeof(struct lec_arp_table), GFP_ATOMIC);
         if (!to_return) {
                 printk("LEC: Arp entry kmalloc failed\n");
                 return NULL;
         }
-        memset(to_return, 0, sizeof(struct lec_arp_table));
         memcpy(to_return->mac_addr, mac_addr, ETH_ALEN);
         init_timer(&to_return->timer);
         to_return->timer.function = lec_arp_expire_arp;
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 9aafe1e2f04..00704661e83 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -258,10 +258,9 @@ static struct mpoa_client *alloc_mpc(void)
 {
 	struct mpoa_client *mpc;
 
-	mpc = kmalloc(sizeof (struct mpoa_client), GFP_KERNEL);
+	mpc = kzalloc(sizeof (struct mpoa_client), GFP_KERNEL);
 	if (mpc == NULL)
 		return NULL;
-	memset(mpc, 0, sizeof(struct mpoa_client));
 	rwlock_init(&mpc->ingress_lock);
 	rwlock_init(&mpc->egress_lock);
 	mpc->next = mpcs;
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 76a7d8ff6c0..19d5dfc0702 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -287,10 +287,9 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg)
 	if (be.encaps != PPPOATM_ENCAPS_AUTODETECT &&
 	    be.encaps != PPPOATM_ENCAPS_VC && be.encaps != PPPOATM_ENCAPS_LLC)
 		return -EINVAL;
-	pvcc = kmalloc(sizeof(*pvcc), GFP_KERNEL);
+	pvcc = kzalloc(sizeof(*pvcc), GFP_KERNEL);
 	if (pvcc == NULL)
 		return -ENOMEM;
-	memset(pvcc, 0, sizeof(*pvcc));
 	pvcc->atmvcc = atmvcc;
 	pvcc->old_push = atmvcc->push;
 	pvcc->old_pop = atmvcc->pop;
diff --git a/net/atm/resources.c b/net/atm/resources.c
index de25c6408b0..529f7e64aa2 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -33,10 +33,9 @@ static struct atm_dev *__alloc_atm_dev(const char *type)
 {
 	struct atm_dev *dev;
 
-	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev)
 		return NULL;
-	memset(dev, 0, sizeof(*dev));
 	dev->type = type;
 	dev->signal = ATM_PHY_SIG_UNKNOWN;
 	dev->link_rate = ATM_OC3_PCR;
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index 369a75b160f..867d4253797 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -203,13 +203,11 @@ void ax25_register_sysctl(void)
 	for (ax25_table_size = sizeof(ctl_table), ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
 		ax25_table_size += sizeof(ctl_table);
 
-	if ((ax25_table = kmalloc(ax25_table_size, GFP_ATOMIC)) == NULL) {
+	if ((ax25_table = kzalloc(ax25_table_size, GFP_ATOMIC)) == NULL) {
 		spin_unlock_bh(&ax25_dev_lock);
 		return;
 	}
 
-	memset(ax25_table, 0x00, ax25_table_size);
-
 	for (n = 0, ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) {
 		ctl_table *child = kmalloc(sizeof(ax25_param_table), GFP_ATOMIC);
 		if (!child) {
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 159fb840982..4e4119a1213 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -162,12 +162,10 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 		if (num > BR_MAX_PORTS)
 			num = BR_MAX_PORTS;
 
-		indices = kmalloc(num*sizeof(int), GFP_KERNEL);
+		indices = kcalloc(num, sizeof(int), GFP_KERNEL);
 		if (indices == NULL)
 			return -ENOMEM;
 
-		memset(indices, 0, num*sizeof(int));
-
 		get_port_ifindices(br, indices, num);
 		if (copy_to_user((void __user *)args[1], indices, num*sizeof(int)))
 			num =  -EFAULT;
@@ -327,11 +325,10 @@ static int old_deviceless(void __user *uarg)
 
 		if (args[2] >= 2048)
 			return -ENOMEM;
-		indices = kmalloc(args[2]*sizeof(int), GFP_KERNEL);
+		indices = kcalloc(args[2], sizeof(int), GFP_KERNEL);
 		if (indices == NULL)
 			return -ENOMEM;
 
-		memset(indices, 0, args[2]*sizeof(int));
 		args[2] = get_bridge_ifindices(indices, args[2]);
 
 		ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int))
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 98a25208440..476455fbdb0 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -413,11 +413,7 @@ static struct dn_ifaddr *dn_dev_alloc_ifa(void)
 {
 	struct dn_ifaddr *ifa;
 
-	ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
-
-	if (ifa) {
-		memset(ifa, 0, sizeof(*ifa));
-	}
+	ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
 
 	return ifa;
 }
@@ -1105,10 +1101,9 @@ struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
 		return NULL;
 
 	*err = -ENOBUFS;
-	if ((dn_db = kmalloc(sizeof(struct dn_dev), GFP_ATOMIC)) == NULL)
+	if ((dn_db = kzalloc(sizeof(struct dn_dev), GFP_ATOMIC)) == NULL)
 		return NULL;
 
-	memset(dn_db, 0, sizeof(struct dn_dev));
 	memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms));
 	smp_wmb();
 	dev->dn_ptr = dn_db;
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 0375077391b..fa20e2efcfc 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -283,11 +283,10 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
 			goto err_inval;
 	}
 
-	fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL);
+	fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL);
 	err = -ENOBUFS;
 	if (fi == NULL)
 		goto failure;
-	memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct dn_fib_nh));
 
 	fi->fib_protocol = r->rtm_protocol;
 	fi->fib_nhs = nhs;
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 5ce9c9e0565..ff0ebe99137 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -580,12 +580,11 @@ static int dn_neigh_seq_open(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq;
 	int rc = -ENOMEM;
-	struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+	struct neigh_seq_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
 
 	if (!s)
 		goto out;
 
-	memset(s, 0, sizeof(*s));
 	rc = seq_open(file, &dn_neigh_seq_ops);
 	if (rc)
 		goto out_kfree;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 22f321d9bf9..6986be754ef 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -151,10 +151,9 @@ int dn_fib_rtm_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		}
 	}
 
-	new_r = kmalloc(sizeof(*new_r), GFP_KERNEL);
+	new_r = kzalloc(sizeof(*new_r), GFP_KERNEL);
 	if (!new_r)
 		return -ENOMEM;
-	memset(new_r, 0, sizeof(*new_r));
 
 	if (rta[RTA_SRC-1])
 		memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 2);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 37d9d0a1ac8..e926c952e36 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -158,12 +158,10 @@ static void dn_rehash_zone(struct dn_zone *dz)
 			break;
 	}
 
-	ht = kmalloc(new_divisor*sizeof(struct dn_fib_node*), GFP_KERNEL);
-
+	ht = kcalloc(new_divisor, sizeof(struct dn_fib_node*), GFP_KERNEL);
 	if (ht == NULL)
 		return;
 
-	memset(ht, 0, new_divisor*sizeof(struct dn_fib_node *));
 	write_lock_bh(&dn_fib_tables_lock);
 	old_ht = dz->dz_hash;
 	dz->dz_hash = ht;
@@ -184,11 +182,10 @@ static void dn_free_node(struct dn_fib_node *f)
 static struct dn_zone *dn_new_zone(struct dn_hash *table, int z)
 {
 	int i;
-	struct dn_zone *dz = kmalloc(sizeof(struct dn_zone), GFP_KERNEL);
+	struct dn_zone *dz = kzalloc(sizeof(struct dn_zone), GFP_KERNEL);
 	if (!dz)
 		return NULL;
 
-	memset(dz, 0, sizeof(struct dn_zone));
 	if (z) {
 		dz->dz_divisor = 16;
 		dz->dz_hashmask = 0x0F;
@@ -197,14 +194,12 @@ static struct dn_zone *dn_new_zone(struct dn_hash *table, int z)
 		dz->dz_hashmask = 0;
 	}
 
-	dz->dz_hash = kmalloc(dz->dz_divisor*sizeof(struct dn_fib_node *), GFP_KERNEL);
-
+	dz->dz_hash = kcalloc(dz->dz_divisor, sizeof(struct dn_fib_node *), GFP_KERNEL);
 	if (!dz->dz_hash) {
 		kfree(dz);
 		return NULL;
 	}
 
-	memset(dz->dz_hash, 0, dz->dz_divisor*sizeof(struct dn_fib_node*));
 	dz->dz_order = z;
 	dz->dz_mask = dnet_make_mask(z);
 
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 309ae4c6549..4d66aac1348 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -673,12 +673,11 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg)
 		edev = dev->ec_ptr;
 		if (edev == NULL) {
 			/* Magic up a new one. */
-			edev = kmalloc(sizeof(struct ec_device), GFP_KERNEL);
+			edev = kzalloc(sizeof(struct ec_device), GFP_KERNEL);
 			if (edev == NULL) {
 				err = -ENOMEM;
 				break;
 			}
-			memset(edev, 0, sizeof(struct ec_device));
 			dev->ec_ptr = edev;
 		} else
 			net2dev_map[edev->net] = NULL;
diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c
index cb71d794a7d..5ed0a98b2d7 100644
--- a/net/ieee80211/ieee80211_crypt.c
+++ b/net/ieee80211/ieee80211_crypt.c
@@ -110,11 +110,10 @@ int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops)
 	unsigned long flags;
 	struct ieee80211_crypto_alg *alg;
 
-	alg = kmalloc(sizeof(*alg), GFP_KERNEL);
+	alg = kzalloc(sizeof(*alg), GFP_KERNEL);
 	if (alg == NULL)
 		return -ENOMEM;
 
-	memset(alg, 0, sizeof(*alg));
 	alg->ops = ops;
 
 	spin_lock_irqsave(&ieee80211_crypto_lock, flags);
diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c
index 492647382ad..ed90a8af144 100644
--- a/net/ieee80211/ieee80211_crypt_ccmp.c
+++ b/net/ieee80211/ieee80211_crypt_ccmp.c
@@ -76,10 +76,9 @@ static void *ieee80211_ccmp_init(int key_idx)
 {
 	struct ieee80211_ccmp_data *priv;
 
-	priv = kmalloc(sizeof(*priv), GFP_ATOMIC);
+	priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
 	if (priv == NULL)
 		goto fail;
-	memset(priv, 0, sizeof(*priv));
 	priv->key_idx = key_idx;
 
 	priv->tfm = crypto_alloc_tfm("aes", 0);
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index c5a87724aab..0ebf235f693 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -39,10 +39,9 @@ static void *prism2_wep_init(int keyidx)
 {
 	struct prism2_wep_data *priv;
 
-	priv = kmalloc(sizeof(*priv), GFP_ATOMIC);
+	priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
 	if (priv == NULL)
 		goto fail;
-	memset(priv, 0, sizeof(*priv));
 	priv->key_idx = keyidx;
 
 	priv->tfm = crypto_alloc_tfm("arc4", 0);
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index a78c4f845f6..5cb9cfd3539 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -369,11 +369,10 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 		struct ieee80211_crypt_data *new_crypt;
 
 		/* take WEP into use */
-		new_crypt = kmalloc(sizeof(struct ieee80211_crypt_data),
+		new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data),
 				    GFP_KERNEL);
 		if (new_crypt == NULL)
 			return -ENOMEM;
-		memset(new_crypt, 0, sizeof(struct ieee80211_crypt_data));
 		new_crypt->ops = ieee80211_get_crypto_ops("WEP");
 		if (!new_crypt->ops) {
 			request_module("ieee80211_crypt_wep");
@@ -616,13 +615,11 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
 
 		ieee80211_crypt_delayed_deinit(ieee, crypt);
 
-		new_crypt = (struct ieee80211_crypt_data *)
-		    kmalloc(sizeof(*new_crypt), GFP_KERNEL);
+		new_crypt = kzalloc(sizeof(*new_crypt), GFP_KERNEL);
 		if (new_crypt == NULL) {
 			ret = -ENOMEM;
 			goto done;
 		}
-		memset(new_crypt, 0, sizeof(struct ieee80211_crypt_data));
 		new_crypt->ops = ops;
 		if (new_crypt->ops && try_module_get(new_crypt->ops->owner))
 			new_crypt->priv = new_crypt->ops->init(idx);
diff --git a/net/ieee80211/softmac/ieee80211softmac_io.c b/net/ieee80211/softmac/ieee80211softmac_io.c
index 8cc8b20f5cd..6ae5a1dc795 100644
--- a/net/ieee80211/softmac/ieee80211softmac_io.c
+++ b/net/ieee80211/softmac/ieee80211softmac_io.c
@@ -96,8 +96,7 @@ ieee80211softmac_alloc_mgt(u32 size)
 	if(size > IEEE80211_DATA_LEN)
 		return NULL;
 	/* Allocate the frame */
-	data = kmalloc(size, GFP_ATOMIC);
-	memset(data, 0, size);
+	data = kzalloc(size, GFP_ATOMIC);
 	return data;
 }
 
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 8e748be36c5..1366bc6ce6a 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -215,12 +215,10 @@ static int ah_init_state(struct xfrm_state *x)
 	if (x->encap)
 		goto error;
 
-	ahp = kmalloc(sizeof(*ahp), GFP_KERNEL);
+	ahp = kzalloc(sizeof(*ahp), GFP_KERNEL);
 	if (ahp == NULL)
 		return -ENOMEM;
 
-	memset(ahp, 0, sizeof(*ahp));
-
 	ahp->key = x->aalg->alg_key;
 	ahp->key_len = (x->aalg->alg_key_len+7)/8;
 	ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7b51b3bdb54..c8a3723bc00 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1372,12 +1372,11 @@ static int arp_seq_open(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq;
 	int rc = -ENOMEM;
-	struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+	struct neigh_seq_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
        
 	if (!s)
 		goto out;
 
-	memset(s, 0, sizeof(*s));
 	rc = seq_open(file, &arp_seq_ops);
 	if (rc)
 		goto out_kfree;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a7c65e9e5ec..a6cc31d911e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -93,10 +93,9 @@ static void devinet_sysctl_unregister(struct ipv4_devconf *p);
 
 static struct in_ifaddr *inet_alloc_ifa(void)
 {
-	struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
+	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
 
 	if (ifa) {
-		memset(ifa, 0, sizeof(*ifa));
 		INIT_RCU_HEAD(&ifa->rcu_head);
 	}
 
@@ -140,10 +139,9 @@ struct in_device *inetdev_init(struct net_device *dev)
 
 	ASSERT_RTNL();
 
-	in_dev = kmalloc(sizeof(*in_dev), GFP_KERNEL);
+	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
 	if (!in_dev)
 		goto out;
-	memset(in_dev, 0, sizeof(*in_dev));
 	INIT_RCU_HEAD(&in_dev->rcu_head);
 	memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
 	in_dev->cnf.sysctl = NULL;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 4e112738b3f..fc2f8ce441d 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -316,12 +316,10 @@ static int esp_init_state(struct xfrm_state *x)
 	if (x->ealg == NULL)
 		goto error;
 
-	esp = kmalloc(sizeof(*esp), GFP_KERNEL);
+	esp = kzalloc(sizeof(*esp), GFP_KERNEL);
 	if (esp == NULL)
 		return -ENOMEM;
 
-	memset(esp, 0, sizeof(*esp));
-
 	if (x->aalg) {
 		struct xfrm_algo_desc *aalg_desc;
 
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 3c1d32ad35f..72c633b357c 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -204,11 +204,10 @@ static struct fn_zone *
 fn_new_zone(struct fn_hash *table, int z)
 {
 	int i;
-	struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL);
+	struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL);
 	if (!fz)
 		return NULL;
 
-	memset(fz, 0, sizeof(struct fn_zone));
 	if (z) {
 		fz->fz_divisor = 16;
 	} else {
@@ -1046,7 +1045,7 @@ static int fib_seq_open(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq;
 	int rc = -ENOMEM;
-	struct fib_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+	struct fib_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
        
 	if (!s)
 		goto out;
@@ -1057,7 +1056,6 @@ static int fib_seq_open(struct inode *inode, struct file *file)
 
 	seq	     = file->private_data;
 	seq->private = s;
-	memset(s, 0, sizeof(*s));
 out:
 	return rc;
 out_kfree:
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 773b12ba4e3..79b04718bdf 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -196,10 +196,9 @@ int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		}
 	}
 
-	new_r = kmalloc(sizeof(*new_r), GFP_KERNEL);
+	new_r = kzalloc(sizeof(*new_r), GFP_KERNEL);
 	if (!new_r)
 		return -ENOMEM;
-	memset(new_r, 0, sizeof(*new_r));
 
 	if (rta[RTA_SRC-1])
 		memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 5f87533684d..63864bb846e 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -709,11 +709,10 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
 			goto failure;
 	}
 
-	fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
+	fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
 	if (fi == NULL)
 		goto failure;
 	fib_info_cnt++;
-	memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
 
 	fi->fib_protocol = r->rtm_protocol;
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d299c8e547d..9f4b752f5a3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1028,10 +1028,9 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 	 * for deleted items allows change reports to use common code with
 	 * non-deleted or query-response MCA's.
 	 */
-	pmc = kmalloc(sizeof(*pmc), GFP_KERNEL);
+	pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
 	if (!pmc)
 		return;
-	memset(pmc, 0, sizeof(*pmc));
 	spin_lock_bh(&im->lock);
 	pmc->interface = im->interface;
 	in_dev_hold(in_dev);
@@ -1529,10 +1528,9 @@ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
 		psf_prev = psf;
 	}
 	if (!psf) {
-		psf = kmalloc(sizeof(*psf), GFP_ATOMIC);
+		psf = kzalloc(sizeof(*psf), GFP_ATOMIC);
 		if (!psf)
 			return -ENOBUFS;
-		memset(psf, 0, sizeof(*psf));
 		psf->sf_inaddr = *psfsrc;
 		if (psf_prev) {
 			psf_prev->sf_next = psf;
@@ -2380,7 +2378,7 @@ static int igmp_mc_seq_open(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq;
 	int rc = -ENOMEM;
-	struct igmp_mc_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+	struct igmp_mc_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
 
 	if (!s)
 		goto out;
@@ -2390,7 +2388,6 @@ static int igmp_mc_seq_open(struct inode *inode, struct file *file)
 
 	seq = file->private_data;
 	seq->private = s;
-	memset(s, 0, sizeof(*s));
 out:
 	return rc;
 out_kfree:
@@ -2555,7 +2552,7 @@ static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq;
 	int rc = -ENOMEM;
-	struct igmp_mcf_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+	struct igmp_mcf_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
 
 	if (!s)
 		goto out;
@@ -2565,7 +2562,6 @@ static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
 
 	seq = file->private_data;
 	seq->private = s;
-	memset(s, 0, sizeof(*s));
 out:
 	return rc;
 out_kfree:
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 8e7e41b66c7..492858e6faf 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -909,11 +909,10 @@ static int __init inet_diag_init(void)
 					  sizeof(struct inet_diag_handler *));
 	int err = -ENOMEM;
 
-	inet_diag_table = kmalloc(inet_diag_table_size, GFP_KERNEL);
+	inet_diag_table = kzalloc(inet_diag_table_size, GFP_KERNEL);
 	if (!inet_diag_table)
 		goto out;
 
-	memset(inet_diag_table, 0, inet_diag_table_size);
 	idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv,
 					THIS_MODULE);
 	if (idiagnl == NULL)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 8a8b5cf2f7f..a0c28b2b756 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -410,11 +410,10 @@ static int ipcomp_init_state(struct xfrm_state *x)
 		goto out;
 
 	err = -ENOMEM;
-	ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
+	ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
 	if (!ipcd)
 		goto out;
 
-	memset(ipcd, 0, sizeof(*ipcd));
 	x->props.header_len = 0;
 	if (x->props.mode)
 		x->props.header_len += sizeof(struct iphdr);
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index f28ec688216..6a28fafe910 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -735,12 +735,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
 	if (atype != RTN_LOCAL && atype != RTN_UNICAST)
 		return -EINVAL;
 
-	dest = kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
+	dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
 	if (dest == NULL) {
 		IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
 		return -ENOMEM;
 	}
-	memset(dest, 0, sizeof(struct ip_vs_dest));
 
 	dest->protocol = svc->protocol;
 	dest->vaddr = svc->addr;
@@ -1050,14 +1049,12 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
 		goto out_mod_dec;
 	}
 
-	svc = (struct ip_vs_service *)
-		kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
+	svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
 	if (svc == NULL) {
 		IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
 		ret = -ENOMEM;
 		goto out_err;
 	}
-	memset(svc, 0, sizeof(struct ip_vs_service));
 
 	/* I'm the first user of the service */
 	atomic_set(&svc->usecnt, 1);
@@ -1797,7 +1794,7 @@ static int ip_vs_info_open(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq;
 	int rc = -ENOMEM;
-	struct ip_vs_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
+	struct ip_vs_iter *s = kzalloc(sizeof(*s), GFP_KERNEL);
 
 	if (!s)
 		goto out;
@@ -1808,7 +1805,6 @@ static int ip_vs_info_open(struct inode *inode, struct file *file)
 
 	seq	     = file->private_data;
 	seq->private = s;
-	memset(s, 0, sizeof(*s));
 out:
 	return rc;
 out_kfree:
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 4c1940381ba..7d68b80c4c1 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -123,11 +123,10 @@ int ip_vs_new_estimator(struct ip_vs_stats *stats)
 {
 	struct ip_vs_estimator *est;
 
-	est = kmalloc(sizeof(*est), GFP_KERNEL);
+	est = kzalloc(sizeof(*est), GFP_KERNEL);
 	if (est == NULL)
 		return -ENOMEM;
 
-	memset(est, 0, sizeof(*est));
 	est->stats = stats;
 	est->last_conns = stats->conns;
 	est->cps = stats->cps<<10;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index cbffeae3f56..d994c5f5744 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -172,11 +172,10 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
 	struct clusterip_config *c;
 	char buffer[16];
 
-	c = kmalloc(sizeof(*c), GFP_ATOMIC);
+	c = kzalloc(sizeof(*c), GFP_ATOMIC);
 	if (!c)
 		return NULL;
 
-	memset(c, 0, sizeof(*c));
 	c->dev = dev;
 	c->clusterip = ip;
 	memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a891133f00e..f6f39e81429 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1640,10 +1640,9 @@ static int tcp_seq_open(struct inode *inode, struct file *file)
 	if (unlikely(afinfo == NULL))
 		return -EINVAL;
 
-	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
 	if (!s)
 		return -ENOMEM;
-	memset(s, 0, sizeof(*s));
 	s->family		= afinfo->family;
 	s->seq_ops.start	= tcp_seq_start;
 	s->seq_ops.next		= tcp_seq_next;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9bfcddad695..f136cec96d9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1468,11 +1468,10 @@ static int udp_seq_open(struct inode *inode, struct file *file)
 	struct udp_seq_afinfo *afinfo = PDE(inode)->data;
 	struct seq_file *seq;
 	int rc = -ENOMEM;
-	struct udp_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+	struct udp_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
 
 	if (!s)
 		goto out;
-	memset(s, 0, sizeof(*s));
 	s->family		= afinfo->family;
 	s->seq_ops.start	= udp_seq_start;
 	s->seq_ops.next		= udp_seq_next;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index bc77c0e1a94..84d7ebdb9d2 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -567,10 +567,9 @@ static inline struct ipv6_txoptions *create_tel(__u8 encap_limit)
 
 	int opt_len = sizeof(*opt) + 8;
 
-	if (!(opt = kmalloc(opt_len, GFP_ATOMIC))) {
+	if (!(opt = kzalloc(opt_len, GFP_ATOMIC))) {
 		return NULL;
 	}
-	memset(opt, 0, opt_len);
 	opt->tot_len = opt_len;
 	opt->dst0opt = (struct ipv6_opt_hdr *) (opt + 1);
 	opt->opt_nflen = 8;
diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
index 9c4a902a9db..ad6b6af3dd9 100644
--- a/net/irda/ircomm/ircomm_core.c
+++ b/net/irda/ircomm/ircomm_core.c
@@ -115,12 +115,10 @@ struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line)
 
 	IRDA_ASSERT(ircomm != NULL, return NULL;);
 
-	self = kmalloc(sizeof(struct ircomm_cb), GFP_ATOMIC);
+	self = kzalloc(sizeof(struct ircomm_cb), GFP_ATOMIC);
 	if (self == NULL)
 		return NULL;
 
-	memset(self, 0, sizeof(struct ircomm_cb));
-
 	self->notify = *notify;
 	self->magic = IRCOMM_MAGIC;
 
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index cde3b84d4a0..3bcdb467efc 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -379,12 +379,11 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
 	self = hashbin_lock_find(ircomm_tty, line, NULL);
 	if (!self) {
 		/* No, so make new instance */
-		self = kmalloc(sizeof(struct ircomm_tty_cb), GFP_KERNEL);
+		self = kzalloc(sizeof(struct ircomm_tty_cb), GFP_KERNEL);
 		if (self == NULL) {
 			IRDA_ERROR("%s(), kmalloc failed!\n", __FUNCTION__);
 			return -ENOMEM;
 		}
-		memset(self, 0, sizeof(struct ircomm_tty_cb));
 		
 		self->magic = IRCOMM_TTY_MAGIC;
 		self->flow = FLOW_STOP;
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index ba40e5495f5..7e7a31798d8 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -401,12 +401,10 @@ dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
 	}
 
 	/* Allocate dongle info for this instance */
-	dongle = kmalloc(sizeof(dongle_t), GFP_KERNEL);
+	dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL);
 	if (!dongle)
 		goto out;
 
-	memset(dongle, 0, sizeof(dongle_t));
-
 	/* Bind the registration info to this particular instance */
 	dongle->issue = reg;
 	dongle->dev = dev;
diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c
index 82e665c7999..a154b1d71c0 100644
--- a/net/irda/irias_object.c
+++ b/net/irda/irias_object.c
@@ -82,13 +82,12 @@ struct ias_object *irias_new_object( char *name, int id)
 
 	IRDA_DEBUG( 4, "%s()\n", __FUNCTION__);
 
-	obj = kmalloc(sizeof(struct ias_object), GFP_ATOMIC);
+	obj = kzalloc(sizeof(struct ias_object), GFP_ATOMIC);
 	if (obj == NULL) {
 		IRDA_WARNING("%s(), Unable to allocate object!\n",
 			     __FUNCTION__);
 		return NULL;
 	}
-	memset(obj, 0, sizeof( struct ias_object));
 
 	obj->magic = IAS_OBJECT_MAGIC;
 	obj->name = strndup(name, IAS_MAX_CLASSNAME);
@@ -346,13 +345,12 @@ void irias_add_integer_attrib(struct ias_object *obj, char *name, int value,
 	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
 	IRDA_ASSERT(name != NULL, return;);
 
-	attrib = kmalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
+	attrib = kzalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
 	if (attrib == NULL) {
 		IRDA_WARNING("%s: Unable to allocate attribute!\n",
 			     __FUNCTION__);
 		return;
 	}
-	memset(attrib, 0, sizeof( struct ias_attrib));
 
 	attrib->magic = IAS_ATTRIB_MAGIC;
 	attrib->name = strndup(name, IAS_MAX_ATTRIBNAME);
@@ -382,13 +380,12 @@ void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets,
 	IRDA_ASSERT(name != NULL, return;);
 	IRDA_ASSERT(octets != NULL, return;);
 
-	attrib = kmalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
+	attrib = kzalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
 	if (attrib == NULL) {
 		IRDA_WARNING("%s: Unable to allocate attribute!\n",
 			     __FUNCTION__);
 		return;
 	}
-	memset(attrib, 0, sizeof( struct ias_attrib));
 
 	attrib->magic = IAS_ATTRIB_MAGIC;
 	attrib->name = strndup(name, IAS_MAX_ATTRIBNAME);
@@ -416,13 +413,12 @@ void irias_add_string_attrib(struct ias_object *obj, char *name, char *value,
 	IRDA_ASSERT(name != NULL, return;);
 	IRDA_ASSERT(value != NULL, return;);
 
-	attrib = kmalloc(sizeof( struct ias_attrib), GFP_ATOMIC);
+	attrib = kzalloc(sizeof( struct ias_attrib), GFP_ATOMIC);
 	if (attrib == NULL) {
 		IRDA_WARNING("%s: Unable to allocate attribute!\n",
 			     __FUNCTION__);
 		return;
 	}
-	memset(attrib, 0, sizeof( struct ias_attrib));
 
 	attrib->magic = IAS_ATTRIB_MAGIC;
 	attrib->name = strndup(name, IAS_MAX_ATTRIBNAME);
@@ -443,12 +439,11 @@ struct ias_value *irias_new_integer_value(int integer)
 {
 	struct ias_value *value;
 
-	value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC);
+	value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
 	if (value == NULL) {
 		IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
 		return NULL;
 	}
-	memset(value, 0, sizeof(struct ias_value));
 
 	value->type = IAS_INTEGER;
 	value->len = 4;
@@ -469,12 +464,11 @@ struct ias_value *irias_new_string_value(char *string)
 {
 	struct ias_value *value;
 
-	value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC);
+	value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
 	if (value == NULL) {
 		IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
 		return NULL;
 	}
-	memset( value, 0, sizeof( struct ias_value));
 
 	value->type = IAS_STRING;
 	value->charset = CS_ASCII;
@@ -495,12 +489,11 @@ struct ias_value *irias_new_octseq_value(__u8 *octseq , int len)
 {
 	struct ias_value *value;
 
-	value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC);
+	value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
 	if (value == NULL) {
 		IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
 		return NULL;
 	}
-	memset(value, 0, sizeof(struct ias_value));
 
 	value->type = IAS_OCT_SEQ;
 	/* Check length */
@@ -522,12 +515,11 @@ struct ias_value *irias_new_missing_value(void)
 {
 	struct ias_value *value;
 
-	value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC);
+	value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
 	if (value == NULL) {
 		IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
 		return NULL;
 	}
-	memset(value, 0, sizeof(struct ias_value));
 
 	value->type = IAS_MISSING;
 	value->len = 0;
diff --git a/net/irda/irlap.c b/net/irda/irlap.c
index 9199c124d20..e7852a07495 100644
--- a/net/irda/irlap.c
+++ b/net/irda/irlap.c
@@ -116,11 +116,10 @@ struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos,
 	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
 
 	/* Initialize the irlap structure. */
-	self = kmalloc(sizeof(struct irlap_cb), GFP_KERNEL);
+	self = kzalloc(sizeof(struct irlap_cb), GFP_KERNEL);
 	if (self == NULL)
 		return NULL;
 
-	memset(self, 0, sizeof(struct irlap_cb));
 	self->magic = LAP_MAGIC;
 
 	/* Make a binding between the layers */
@@ -1222,7 +1221,7 @@ static int irlap_seq_open(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq;
 	int rc = -ENOMEM;
-	struct irlap_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+	struct irlap_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
        
 	if (!s)
 		goto out;
@@ -1238,7 +1237,6 @@ static int irlap_seq_open(struct inode *inode, struct file *file)
 
 	seq	     = file->private_data;
 	seq->private = s;
-	memset(s, 0, sizeof(*s));
 out:
 	return rc;
 out_kfree:
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index fa5c144ecc0..ccb983bf0f4 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -422,11 +422,10 @@ static void irlap_recv_discovery_xid_rsp(struct irlap_cb *self,
 		return;
 	}
 
-	if ((discovery = kmalloc(sizeof(discovery_t), GFP_ATOMIC)) == NULL) {
+	if ((discovery = kzalloc(sizeof(discovery_t), GFP_ATOMIC)) == NULL) {
 		IRDA_WARNING("%s: kmalloc failed!\n", __FUNCTION__);
 		return;
 	}
-	memset(discovery, 0, sizeof(discovery_t));
 
 	discovery->data.daddr = info->daddr;
 	discovery->data.saddr = self->saddr;
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 5ee79462b30..c440913dee1 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -78,10 +78,9 @@ int __init irlmp_init(void)
 {
 	IRDA_DEBUG(1, "%s()\n", __FUNCTION__);
 	/* Initialize the irlmp structure. */
-	irlmp = kmalloc( sizeof(struct irlmp_cb), GFP_KERNEL);
+	irlmp = kzalloc( sizeof(struct irlmp_cb), GFP_KERNEL);
 	if (irlmp == NULL)
 		return -ENOMEM;
-	memset(irlmp, 0, sizeof(struct irlmp_cb));
 
 	irlmp->magic = LMP_MAGIC;
 
@@ -160,12 +159,11 @@ struct lsap_cb *irlmp_open_lsap(__u8 slsap_sel, notify_t *notify, __u8 pid)
 		return NULL;
 
 	/* Allocate new instance of a LSAP connection */
-	self = kmalloc(sizeof(struct lsap_cb), GFP_ATOMIC);
+	self = kzalloc(sizeof(struct lsap_cb), GFP_ATOMIC);
 	if (self == NULL) {
 		IRDA_ERROR("%s: can't allocate memory\n", __FUNCTION__);
 		return NULL;
 	}
-	memset(self, 0, sizeof(struct lsap_cb));
 
 	self->magic = LMP_LSAP_MAGIC;
 	self->slsap_sel = slsap_sel;
@@ -288,12 +286,11 @@ void irlmp_register_link(struct irlap_cb *irlap, __u32 saddr, notify_t *notify)
 	/*
 	 *  Allocate new instance of a LSAP connection
 	 */
-	lap = kmalloc(sizeof(struct lap_cb), GFP_KERNEL);
+	lap = kzalloc(sizeof(struct lap_cb), GFP_KERNEL);
 	if (lap == NULL) {
 		IRDA_ERROR("%s: unable to kmalloc\n", __FUNCTION__);
 		return;
 	}
-	memset(lap, 0, sizeof(struct lap_cb));
 
 	lap->irlap = irlap;
 	lap->magic = LMP_LAP_MAGIC;
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index e53bf9e0053..a1e502ff907 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -476,11 +476,10 @@ dev_irnet_open(struct inode *	inode,
 #endif /* SECURE_DEVIRNET */
 
   /* Allocate a private structure for this IrNET instance */
-  ap = kmalloc(sizeof(*ap), GFP_KERNEL);
+  ap = kzalloc(sizeof(*ap), GFP_KERNEL);
   DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n");
 
   /* initialize the irnet structure */
-  memset(ap, 0, sizeof(*ap));
   ap->file = file;
 
   /* PPP channel setup */
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 7a3ccb8a669..42acf1cde73 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -85,10 +85,9 @@ static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 };
  */
 int __init irttp_init(void)
 {
-	irttp = kmalloc(sizeof(struct irttp_cb), GFP_KERNEL);
+	irttp = kzalloc(sizeof(struct irttp_cb), GFP_KERNEL);
 	if (irttp == NULL)
 		return -ENOMEM;
-	memset(irttp, 0, sizeof(struct irttp_cb));
 
 	irttp->magic = TTP_MAGIC;
 
@@ -390,12 +389,11 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify)
 		return NULL;
 	}
 
-	self = kmalloc(sizeof(struct tsap_cb), GFP_ATOMIC);
+	self = kzalloc(sizeof(struct tsap_cb), GFP_ATOMIC);
 	if (self == NULL) {
 		IRDA_DEBUG(0, "%s(), unable to kmalloc!\n", __FUNCTION__);
 		return NULL;
 	}
-	memset(self, 0, sizeof(struct tsap_cb));
 	spin_lock_init(&self->lock);
 
 	/* Initialise todo timer */
@@ -1877,7 +1875,7 @@ static int irttp_seq_open(struct inode *inode, struct file *file)
 	int rc = -ENOMEM;
 	struct irttp_iter_state *s;
 
-	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
 	if (!s)
 		goto out;
 
@@ -1887,7 +1885,6 @@ static int irttp_seq_open(struct inode *inode, struct file *file)
 
 	seq	     = file->private_data;
 	seq->private = s;
-	memset(s, 0, sizeof(*s));
 out:
 	return rc;
 out_kfree:
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index aea6616cea3..d504eed416f 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -115,14 +115,12 @@ static struct lapb_cb *lapb_devtostruct(struct net_device *dev)
  */
 static struct lapb_cb *lapb_create_cb(void)
 {
-	struct lapb_cb *lapb = kmalloc(sizeof(*lapb), GFP_ATOMIC);
+	struct lapb_cb *lapb = kzalloc(sizeof(*lapb), GFP_ATOMIC);
 
 
 	if (!lapb)
 		goto out;
 
-	memset(lapb, 0x00, sizeof(*lapb));
-
 	skb_queue_head_init(&lapb->write_queue);
 	skb_queue_head_init(&lapb->ack_queue);
 
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index bd242a49514..d12413cff5b 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -33,10 +33,9 @@ unsigned char llc_station_mac_sa[ETH_ALEN];
  */
 static struct llc_sap *llc_sap_alloc(void)
 {
-	struct llc_sap *sap = kmalloc(sizeof(*sap), GFP_ATOMIC);
+	struct llc_sap *sap = kzalloc(sizeof(*sap), GFP_ATOMIC);
 
 	if (sap) {
-		memset(sap, 0, sizeof(*sap));
 		sap->state = LLC_SAP_STATE_ACTIVE;
 		memcpy(sap->laddr.mac, llc_station_mac_sa, ETH_ALEN);
 		rwlock_init(&sap->sk_list.lock);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 55c0adc8f11..b85c1f9f128 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -562,10 +562,9 @@ static int netlink_alloc_groups(struct sock *sk)
 	if (err)
 		return err;
 
-	nlk->groups = kmalloc(NLGRPSZ(groups), GFP_KERNEL);
+	nlk->groups = kzalloc(NLGRPSZ(groups), GFP_KERNEL);
 	if (nlk->groups == NULL)
 		return -ENOMEM;
-	memset(nlk->groups, 0, NLGRPSZ(groups));
 	nlk->ngroups = groups;
 	return 0;
 }
@@ -1393,11 +1392,10 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	struct sock *sk;
 	struct netlink_sock *nlk;
 
-	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+	cb = kzalloc(sizeof(*cb), GFP_KERNEL);
 	if (cb == NULL)
 		return -ENOBUFS;
 
-	memset(cb, 0, sizeof(*cb));
 	cb->dump = dump;
 	cb->done = done;
 	cb->nlh = nlh;
@@ -1668,7 +1666,7 @@ static int netlink_seq_open(struct inode *inode, struct file *file)
 	struct nl_seq_iter *iter;
 	int err;
 
-	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
 	if (!iter)
 		return -ENOMEM;
 
@@ -1678,7 +1676,6 @@ static int netlink_seq_open(struct inode *inode, struct file *file)
 		return err;
 	}
 
-	memset(iter, 0, sizeof(*iter));
 	seq = file->private_data;
 	seq->private = iter;
 	return 0;
@@ -1747,15 +1744,13 @@ static int __init netlink_proto_init(void)
 	if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb))
 		netlink_skb_parms_too_large();
 
-	nl_table = kmalloc(sizeof(*nl_table) * MAX_LINKS, GFP_KERNEL);
+	nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
 	if (!nl_table) {
 enomem:
 		printk(KERN_CRIT "netlink_init: Cannot allocate nl_table\n");
 		return -ENOMEM;
 	}
 
-	memset(nl_table, 0, sizeof(*nl_table) * MAX_LINKS);
-
 	if (num_physpages >= (128 * 1024))
 		max = num_physpages >> (21 - PAGE_SHIFT);
 	else
diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c
index 573b572f8f9..93d2c55ad2d 100644
--- a/net/rxrpc/connection.c
+++ b/net/rxrpc/connection.c
@@ -58,13 +58,12 @@ static inline int __rxrpc_create_connection(struct rxrpc_peer *peer,
 	_enter("%p",peer);
 
 	/* allocate and initialise a connection record */
-	conn = kmalloc(sizeof(struct rxrpc_connection), GFP_KERNEL);
+	conn = kzalloc(sizeof(struct rxrpc_connection), GFP_KERNEL);
 	if (!conn) {
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
 
-	memset(conn, 0, sizeof(struct rxrpc_connection));
 	atomic_set(&conn->usage, 1);
 
 	INIT_LIST_HEAD(&conn->link);
@@ -535,13 +534,12 @@ int rxrpc_conn_newmsg(struct rxrpc_connection *conn,
 		return -EINVAL;
 	}
 
-	msg = kmalloc(sizeof(struct rxrpc_message), alloc_flags);
+	msg = kzalloc(sizeof(struct rxrpc_message), alloc_flags);
 	if (!msg) {
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
 
-	memset(msg, 0, sizeof(*msg));
 	atomic_set(&msg->usage, 1);
 
 	INIT_LIST_HEAD(&msg->link);
diff --git a/net/rxrpc/peer.c b/net/rxrpc/peer.c
index ed38f5b17c1..8a275157a3b 100644
--- a/net/rxrpc/peer.c
+++ b/net/rxrpc/peer.c
@@ -58,13 +58,12 @@ static int __rxrpc_create_peer(struct rxrpc_transport *trans, __be32 addr,
 	_enter("%p,%08x", trans, ntohl(addr));
 
 	/* allocate and initialise a peer record */
-	peer = kmalloc(sizeof(struct rxrpc_peer), GFP_KERNEL);
+	peer = kzalloc(sizeof(struct rxrpc_peer), GFP_KERNEL);
 	if (!peer) {
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
 
-	memset(peer, 0, sizeof(struct rxrpc_peer));
 	atomic_set(&peer->usage, 1);
 
 	INIT_LIST_HEAD(&peer->link);
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c
index dbe6105e83a..465efc86fcc 100644
--- a/net/rxrpc/transport.c
+++ b/net/rxrpc/transport.c
@@ -68,11 +68,10 @@ int rxrpc_create_transport(unsigned short port,
 
 	_enter("%hu", port);
 
-	trans = kmalloc(sizeof(struct rxrpc_transport), GFP_KERNEL);
+	trans = kzalloc(sizeof(struct rxrpc_transport), GFP_KERNEL);
 	if (!trans)
 		return -ENOMEM;
 
-	memset(trans, 0, sizeof(struct rxrpc_transport));
 	atomic_set(&trans->usage, 1);
 	INIT_LIST_HEAD(&trans->services);
 	INIT_LIST_HEAD(&trans->link);
@@ -312,13 +311,12 @@ static int rxrpc_incoming_msg(struct rxrpc_transport *trans,
 
 	_enter("");
 
-	msg = kmalloc(sizeof(struct rxrpc_message), GFP_KERNEL);
+	msg = kzalloc(sizeof(struct rxrpc_message), GFP_KERNEL);
 	if (!msg) {
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
 
-	memset(msg, 0, sizeof(*msg));
 	atomic_set(&msg->usage, 1);
 	list_add_tail(&msg->link,msgq);
 
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9affeeedf10..a2587b52e53 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -312,10 +312,9 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
 	}
 
 	*err = -ENOMEM;
-	a = kmalloc(sizeof(*a), GFP_KERNEL);
+	a = kzalloc(sizeof(*a), GFP_KERNEL);
 	if (a == NULL)
 		goto err_mod;
-	memset(a, 0, sizeof(*a));
 
 	/* backward compatibility for policer */
 	if (name == NULL)
@@ -492,10 +491,9 @@ tcf_action_get_1(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int *err)
 	index = *(int *)RTA_DATA(tb[TCA_ACT_INDEX - 1]);
 
 	*err = -ENOMEM;
-	a = kmalloc(sizeof(struct tc_action), GFP_KERNEL);
+	a = kzalloc(sizeof(struct tc_action), GFP_KERNEL);
 	if (a == NULL)
 		return NULL;
-	memset(a, 0, sizeof(struct tc_action));
 
 	*err = -EINVAL;
 	a->ops = tc_lookup_action(tb[TCA_ACT_KIND - 1]);
@@ -531,12 +529,11 @@ static struct tc_action *create_a(int i)
 {
 	struct tc_action *act;
 
-	act = kmalloc(sizeof(*act), GFP_KERNEL);
+	act = kzalloc(sizeof(*act), GFP_KERNEL);
 	if (act == NULL) {
 		printk("create_a: failed to alloc!\n");
 		return NULL;
 	}
-	memset(act, 0, sizeof(*act));
 	act->order = i;
 	return act;
 }
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 58b3a865204..f257475e0e0 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -209,10 +209,9 @@ tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref)
 	s = sizeof(*opt) + p->nkeys * sizeof(struct tc_pedit_key);
 
 	/* netlink spinlocks held above us - must use ATOMIC */
-	opt = kmalloc(s, GFP_ATOMIC);
+	opt = kzalloc(s, GFP_ATOMIC);
 	if (opt == NULL)
 		return -ENOBUFS;
-	memset(opt, 0, s);
 
 	memcpy(opt->keys, p->keys, p->nkeys * sizeof(struct tc_pedit_key));
 	opt->index = p->index;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 47e00bd9625..da905d7b4b4 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -196,10 +196,9 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
 		return ret;
 	}
 
-	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (p == NULL)
 		return -ENOMEM;
-	memset(p, 0, sizeof(*p));
 
 	ret = ACT_P_CREATED;
 	p->refcnt = 1;
@@ -429,11 +428,10 @@ struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
 		return p;
 	}
 
-	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (p == NULL)
 		return NULL;
 
-	memset(p, 0, sizeof(*p));
 	p->refcnt = 1;
 	spin_lock_init(&p->lock);
 	p->stats_lock = &p->lock;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 61507f006b1..86cac49a053 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -178,19 +178,17 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 
 	err = -ENOBUFS;
 	if (head == NULL) {
-		head = kmalloc(sizeof(*head), GFP_KERNEL);
+		head = kzalloc(sizeof(*head), GFP_KERNEL);
 		if (head == NULL)
 			goto errout;
 
-		memset(head, 0, sizeof(*head));
 		INIT_LIST_HEAD(&head->flist);
 		tp->root = head;
 	}
 
-	f = kmalloc(sizeof(*f), GFP_KERNEL);
+	f = kzalloc(sizeof(*f), GFP_KERNEL);
 	if (f == NULL)
 		goto errout;
-	memset(f, 0, sizeof(*f));
 
 	err = -EINVAL;
 	if (handle)
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index d41de91fc4f..e6973d9b686 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -267,20 +267,18 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
 		return -EINVAL;
 
 	if (head == NULL) {
-		head = kmalloc(sizeof(struct fw_head), GFP_KERNEL);
+		head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
 		if (head == NULL)
 			return -ENOBUFS;
-		memset(head, 0, sizeof(*head));
 
 		tcf_tree_lock(tp);
 		tp->root = head;
 		tcf_tree_unlock(tp);
 	}
 
-	f = kmalloc(sizeof(struct fw_filter), GFP_KERNEL);
+	f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL);
 	if (f == NULL)
 		return -ENOBUFS;
-	memset(f, 0, sizeof(*f));
 
 	f->id = handle;
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index c2e71900f7b..d3aea730d4c 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -396,10 +396,9 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 	h1 = to_hash(nhandle);
 	if ((b = head->table[h1]) == NULL) {
 		err = -ENOBUFS;
-		b = kmalloc(sizeof(struct route4_bucket), GFP_KERNEL);
+		b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
 		if (b == NULL)
 			goto errout;
-		memset(b, 0, sizeof(*b));
 
 		tcf_tree_lock(tp);
 		head->table[h1] = b;
@@ -475,20 +474,18 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
 
 	err = -ENOBUFS;
 	if (head == NULL) {
-		head = kmalloc(sizeof(struct route4_head), GFP_KERNEL);
+		head = kzalloc(sizeof(struct route4_head), GFP_KERNEL);
 		if (head == NULL)
 			goto errout;
-		memset(head, 0, sizeof(struct route4_head));
 
 		tcf_tree_lock(tp);
 		tp->root = head;
 		tcf_tree_unlock(tp);
 	}
 
-	f = kmalloc(sizeof(struct route4_filter), GFP_KERNEL);
+	f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL);
 	if (f == NULL)
 		goto errout;
-	memset(f, 0, sizeof(*f));
 
 	err = route4_set_parms(tp, base, f, handle, head, tb,
 		tca[TCA_RATE-1], 1);
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index ba874197162..6e230ecfba0 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -240,9 +240,8 @@ static int rsvp_init(struct tcf_proto *tp)
 {
 	struct rsvp_head *data;
 
-	data = kmalloc(sizeof(struct rsvp_head), GFP_KERNEL);
+	data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
 	if (data) {
-		memset(data, 0, sizeof(struct rsvp_head));
 		tp->root = data;
 		return 0;
 	}
@@ -446,11 +445,10 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 		goto errout2;
 
 	err = -ENOBUFS;
-	f = kmalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
+	f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
 	if (f == NULL)
 		goto errout2;
 
-	memset(f, 0, sizeof(*f));
 	h2 = 16;
 	if (tb[TCA_RSVP_SRC-1]) {
 		err = -EINVAL;
@@ -532,10 +530,9 @@ insert:
 	/* No session found. Create new one. */
 
 	err = -ENOBUFS;
-	s = kmalloc(sizeof(struct rsvp_session), GFP_KERNEL);
+	s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
 	if (s == NULL)
 		goto errout;
-	memset(s, 0, sizeof(*s));
 	memcpy(s->dst, dst, sizeof(s->dst));
 
 	if (pinfo) {
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 7870e7bb0ba..5af8a59e150 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -148,11 +148,10 @@ static int tcindex_init(struct tcf_proto *tp)
 	struct tcindex_data *p;
 
 	DPRINTK("tcindex_init(tp %p)\n",tp);
-	p = kmalloc(sizeof(struct tcindex_data),GFP_KERNEL);
+	p = kzalloc(sizeof(struct tcindex_data),GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
 
-	memset(p, 0, sizeof(*p));
 	p->mask = 0xffff;
 	p->hash = DEFAULT_HASH_SIZE;
 	p->fall_through = 1;
@@ -296,16 +295,14 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 	err = -ENOMEM;
 	if (!cp.perfect && !cp.h) {
 		if (valid_perfect_hash(&cp)) {
-			cp.perfect = kmalloc(cp.hash * sizeof(*r), GFP_KERNEL);
+			cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL);
 			if (!cp.perfect)
 				goto errout;
-			memset(cp.perfect, 0, cp.hash * sizeof(*r));
 			balloc = 1;
 		} else {
-			cp.h = kmalloc(cp.hash * sizeof(f), GFP_KERNEL);
+			cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL);
 			if (!cp.h)
 				goto errout;
-			memset(cp.h, 0, cp.hash * sizeof(f));
 			balloc = 2;
 		}
 	}
@@ -316,10 +313,9 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 		r = tcindex_lookup(&cp, handle) ? : &new_filter_result;
 
 	if (r == &new_filter_result) {
-		f = kmalloc(sizeof(*f), GFP_KERNEL);
+		f = kzalloc(sizeof(*f), GFP_KERNEL);
 		if (!f)
 			goto errout_alloc;
-		memset(f, 0, sizeof(*f));
  	}
 
 	if (tb[TCA_TCINDEX_CLASSID-1]) {
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d712edcd1bc..eea36696674 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -307,23 +307,21 @@ static int u32_init(struct tcf_proto *tp)
 		if (tp_c->q == tp->q)
 			break;
 
-	root_ht = kmalloc(sizeof(*root_ht), GFP_KERNEL);
+	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
 	if (root_ht == NULL)
 		return -ENOBUFS;
 
-	memset(root_ht, 0, sizeof(*root_ht));
 	root_ht->divisor = 0;
 	root_ht->refcnt++;
 	root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
 	root_ht->prio = tp->prio;
 
 	if (tp_c == NULL) {
-		tp_c = kmalloc(sizeof(*tp_c), GFP_KERNEL);
+		tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
 		if (tp_c == NULL) {
 			kfree(root_ht);
 			return -ENOBUFS;
 		}
-		memset(tp_c, 0, sizeof(*tp_c));
 		tp_c->q = tp->q;
 		tp_c->next = u32_list;
 		u32_list = tp_c;
@@ -571,10 +569,9 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 			if (handle == 0)
 				return -ENOMEM;
 		}
-		ht = kmalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
+		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
 		if (ht == NULL)
 			return -ENOBUFS;
-		memset(ht, 0, sizeof(*ht) + divisor*sizeof(void*));
 		ht->tp_c = tp_c;
 		ht->refcnt = 0;
 		ht->divisor = divisor;
@@ -617,18 +614,16 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 
 	s = RTA_DATA(tb[TCA_U32_SEL-1]);
 
-	n = kmalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
+	n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
 	if (n == NULL)
 		return -ENOBUFS;
 
-	memset(n, 0, sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key));
 #ifdef CONFIG_CLS_U32_PERF
-	n->pf = kmalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL);
+	n->pf = kzalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL);
 	if (n->pf == NULL) {
 		kfree(n);
 		return -ENOBUFS;
 	}
-	memset(n->pf, 0, sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64));
 #endif
 
 	memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 698372954f4..61e3b740ab1 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -773,10 +773,9 @@ static int em_meta_change(struct tcf_proto *tp, void *data, int len,
 	    TCF_META_ID(hdr->right.kind) > TCF_META_ID_MAX)
 		goto errout;
 
-	meta = kmalloc(sizeof(*meta), GFP_KERNEL);
+	meta = kzalloc(sizeof(*meta), GFP_KERNEL);
 	if (meta == NULL)
 		goto errout;
-	memset(meta, 0, sizeof(*meta));
 
 	memcpy(&meta->lvalue.hdr, &hdr->left, sizeof(hdr->left));
 	memcpy(&meta->rvalue.hdr, &hdr->right, sizeof(hdr->right));
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 2405a86093a..0fd0768a17c 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -321,10 +321,9 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta,
 	list_len = RTA_PAYLOAD(rt_list);
 	matches_len = tree_hdr->nmatches * sizeof(*em);
 
-	tree->matches = kmalloc(matches_len, GFP_KERNEL);
+	tree->matches = kzalloc(matches_len, GFP_KERNEL);
 	if (tree->matches == NULL)
 		goto errout;
-	memset(tree->matches, 0, matches_len);
 
 	/* We do not use rtattr_parse_nested here because the maximum
 	 * number of attributes is unknown. This saves us the allocation
diff --git a/net/sched/estimator.c b/net/sched/estimator.c
index 5d3ae03e22a..0ebc98e9be2 100644
--- a/net/sched/estimator.c
+++ b/net/sched/estimator.c
@@ -139,11 +139,10 @@ int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct r
 	if (parm->interval < -2 || parm->interval > 3)
 		return -EINVAL;
 
-	est = kmalloc(sizeof(*est), GFP_KERNEL);
+	est = kzalloc(sizeof(*est), GFP_KERNEL);
 	if (est == NULL)
 		return -ENOBUFS;
 
-	memset(est, 0, sizeof(*est));
 	est->interval = parm->interval + 2;
 	est->stats = stats;
 	est->stats_lock = stats_lock;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 80b7f6a8d00..bac881bfe36 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1926,10 +1926,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
 	}
 
 	err = -ENOBUFS;
-	cl = kmalloc(sizeof(*cl), GFP_KERNEL);
+	cl = kzalloc(sizeof(*cl), GFP_KERNEL);
 	if (cl == NULL)
 		goto failure;
-	memset(cl, 0, sizeof(*cl));
 	cl->R_tab = rtab;
 	rtab = NULL;
 	cl->refcnt = 1;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d735f51686a..0834c2ee917 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -432,10 +432,9 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
 	size = QDISC_ALIGN(sizeof(*sch));
 	size += ops->priv_size + (QDISC_ALIGNTO - 1);
 
-	p = kmalloc(size, GFP_KERNEL);
+	p = kzalloc(size, GFP_KERNEL);
 	if (!p)
 		goto errout;
-	memset(p, 0, size);
 	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
 	sch->padded = (char *) sch - (char *) p;
 
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 0cafdd5feb1..18e81a8ffb0 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -406,10 +406,9 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
 	struct gred_sched_data *q;
 
 	if (table->tab[dp] == NULL) {
-		table->tab[dp] = kmalloc(sizeof(*q), GFP_KERNEL);
+		table->tab[dp] = kzalloc(sizeof(*q), GFP_KERNEL);
 		if (table->tab[dp] == NULL)
 			return -ENOMEM;
-		memset(table->tab[dp], 0, sizeof(*q));
 	}
 
 	q = table->tab[dp];
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 6b1b4a981e8..6a6735a2ed3 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1123,10 +1123,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (rsc == NULL && fsc == NULL)
 		return -EINVAL;
 
-	cl = kmalloc(sizeof(struct hfsc_class), GFP_KERNEL);
+	cl = kzalloc(sizeof(struct hfsc_class), GFP_KERNEL);
 	if (cl == NULL)
 		return -ENOBUFS;
-	memset(cl, 0, sizeof(struct hfsc_class));
 
 	if (rsc != NULL)
 		hfsc_change_rsc(cl, rsc, 0);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index cc5f339e6f9..880a3394a51 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1559,10 +1559,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			goto failure;
 		}
 		err = -ENOBUFS;
-		if ((cl = kmalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
+		if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
 			goto failure;
 		
-		memset(cl, 0, sizeof(*cl));
 		cl->refcnt = 1;
 		INIT_LIST_HEAD(&cl->sibling);
 		INIT_LIST_HEAD(&cl->hlist);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 519ebc17c02..4a9aa9393b9 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -225,9 +225,8 @@ gss_alloc_context(void)
 {
 	struct gss_cl_ctx *ctx;
 
-	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (ctx != NULL) {
-		memset(ctx, 0, sizeof(*ctx));
 		ctx->gc_proc = RPC_GSS_PROC_DATA;
 		ctx->gc_seq = 1;	/* NetApp 6.4R1 doesn't accept seq. no. 0 */
 		spin_lock_init(&ctx->gc_seq_lock);
@@ -391,9 +390,8 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid)
 {
 	struct gss_upcall_msg *gss_msg;
 
-	gss_msg = kmalloc(sizeof(*gss_msg), GFP_KERNEL);
+	gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL);
 	if (gss_msg != NULL) {
-		memset(gss_msg, 0, sizeof(*gss_msg));
 		INIT_LIST_HEAD(&gss_msg->list);
 		rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq");
 		init_waitqueue_head(&gss_msg->waitqueue);
@@ -776,10 +774,9 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 	dprintk("RPC:      gss_create_cred for uid %d, flavor %d\n",
 		acred->uid, auth->au_flavor);
 
-	if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL)))
+	if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL)))
 		goto out_err;
 
-	memset(cred, 0, sizeof(*cred));
 	atomic_set(&cred->gc_count, 1);
 	cred->gc_uid = acred->uid;
 	/*
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index b8714a87b34..70e1e53a632 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -129,9 +129,8 @@ gss_import_sec_context_kerberos(const void *p,
 	const void *end = (const void *)((const char *)p + len);
 	struct	krb5_ctx *ctx;
 
-	if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)))
+	if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL)))
 		goto out_err;
-	memset(ctx, 0, sizeof(*ctx));
 
 	p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
 	if (IS_ERR(p))
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index d88468d21c3..3db745379d0 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -237,9 +237,8 @@ gss_import_sec_context(const void *input_token, size_t bufsize,
 		       struct gss_api_mech	*mech,
 		       struct gss_ctx		**ctx_id)
 {
-	if (!(*ctx_id = kmalloc(sizeof(**ctx_id), GFP_KERNEL)))
+	if (!(*ctx_id = kzalloc(sizeof(**ctx_id), GFP_KERNEL)))
 		return GSS_S_FAILURE;
-	memset(*ctx_id, 0, sizeof(**ctx_id));
 	(*ctx_id)->mech_type = gss_mech_get(mech);
 
 	return mech->gm_ops
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 3d0432aa45c..88dcb52d171 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -152,9 +152,8 @@ gss_import_sec_context_spkm3(const void *p, size_t len,
 	const void *end = (const void *)((const char *)p + len);
 	struct	spkm3_ctx *ctx;
 
-	if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)))
+	if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL)))
 		goto out_err;
-	memset(ctx, 0, sizeof(*ctx));
 
 	p = simple_get_netobj(p, end, &ctx->ctx_id);
 	if (IS_ERR(p))
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
index af0d7ce7468..854a983ccf2 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -90,10 +90,9 @@ asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits)
 int
 decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen)
 {
-	if (!(out->data = kmalloc(explen,GFP_KERNEL)))
+	if (!(out->data = kzalloc(explen,GFP_KERNEL)))
 		return 0;
 	out->len = explen;
-	memset(out->data, 0, explen);
 	memcpy(out->data, in, enclen);
 	return 1;
 }
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index aa8965e9d30..4ba271f892c 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -125,10 +125,9 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
 		goto out_err;
 
 	err = -ENOMEM;
-	clnt = kmalloc(sizeof(*clnt), GFP_KERNEL);
+	clnt = kzalloc(sizeof(*clnt), GFP_KERNEL);
 	if (!clnt)
 		goto out_err;
-	memset(clnt, 0, sizeof(*clnt));
 	atomic_set(&clnt->cl_users, 0);
 	atomic_set(&clnt->cl_count, 1);
 	clnt->cl_parent = clnt;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 15c2db26767..bd98124c3a6 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -114,13 +114,8 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
  */
 struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
 {
-	unsigned int ops = clnt->cl_maxproc;
-	size_t size = ops * sizeof(struct rpc_iostats);
 	struct rpc_iostats *new;
-
-	new = kmalloc(size, GFP_KERNEL);
-	if (new)
-		memset(new, 0 , size);
+	new = kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL);
 	return new;
 }
 EXPORT_SYMBOL(rpc_alloc_iostats);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 01ba60a4957..b76a227dd3a 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -32,9 +32,8 @@ svc_create(struct svc_program *prog, unsigned int bufsize)
 	int vers;
 	unsigned int xdrsize;
 
-	if (!(serv = kmalloc(sizeof(*serv), GFP_KERNEL)))
+	if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
 		return NULL;
-	memset(serv, 0, sizeof(*serv));
 	serv->sv_name      = prog->pg_name;
 	serv->sv_program   = prog;
 	serv->sv_nrthreads = 1;
@@ -159,11 +158,10 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
 	struct svc_rqst	*rqstp;
 	int		error = -ENOMEM;
 
-	rqstp = kmalloc(sizeof(*rqstp), GFP_KERNEL);
+	rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
 	if (!rqstp)
 		goto out;
 
-	memset(rqstp, 0, sizeof(*rqstp));
 	init_waitqueue_head(&rqstp->rq_wait);
 
 	if (!(rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL))
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a27905a0ad2..d9a95732df4 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1322,11 +1322,10 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock,
 	struct sock	*inet;
 
 	dprintk("svc: svc_setup_socket %p\n", sock);
-	if (!(svsk = kmalloc(sizeof(*svsk), GFP_KERNEL))) {
+	if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
 		*errp = -ENOMEM;
 		return NULL;
 	}
-	memset(svsk, 0, sizeof(*svsk));
 
 	inet = sock->sk;
 
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 02060d0e7be..313b68d892c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -908,9 +908,8 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc
 	struct rpc_xprt	*xprt;
 	struct rpc_rqst	*req;
 
-	if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL)
+	if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL)
 		return ERR_PTR(-ENOMEM);
-	memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */
 
 	xprt->addr = *ap;
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 21006b10910..ee678ed13b6 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1276,10 +1276,9 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 
 	xprt->max_reqs = xprt_udp_slot_table_entries;
 	slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
-	xprt->slot = kmalloc(slot_table_size, GFP_KERNEL);
+	xprt->slot = kzalloc(slot_table_size, GFP_KERNEL);
 	if (xprt->slot == NULL)
 		return -ENOMEM;
-	memset(xprt->slot, 0, slot_table_size);
 
 	xprt->prot = IPPROTO_UDP;
 	xprt->port = xs_get_random_port();
@@ -1318,10 +1317,9 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 
 	xprt->max_reqs = xprt_tcp_slot_table_entries;
 	slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
-	xprt->slot = kmalloc(slot_table_size, GFP_KERNEL);
+	xprt->slot = kzalloc(slot_table_size, GFP_KERNEL);
 	if (xprt->slot == NULL)
 		return -ENOMEM;
-	memset(xprt->slot, 0, slot_table_size);
 
 	xprt->prot = IPPROTO_TCP;
 	xprt->port = xs_get_random_port();
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 7ef17a449cf..75a5968c213 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -665,11 +665,9 @@ int tipc_bearer_init(void)
 	int res;
 
 	write_lock_bh(&tipc_net_lock);
-	tipc_bearers = kmalloc(MAX_BEARERS * sizeof(struct bearer), GFP_ATOMIC);
-	media_list = kmalloc(MAX_MEDIA * sizeof(struct media), GFP_ATOMIC);
+	tipc_bearers = kcalloc(MAX_BEARERS, sizeof(struct bearer), GFP_ATOMIC);
+	media_list = kcalloc(MAX_MEDIA, sizeof(struct media), GFP_ATOMIC);
 	if (tipc_bearers && media_list) {
-		memset(tipc_bearers, 0, MAX_BEARERS * sizeof(struct bearer));
-		memset(media_list, 0, MAX_MEDIA * sizeof(struct media));
 		res = TIPC_OK;
 	} else {
 		kfree(tipc_bearers);
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index 1dcb6940e33..b46b5188a9f 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -57,29 +57,25 @@ struct cluster *tipc_cltr_create(u32 addr)
 	struct _zone *z_ptr;
 	struct cluster *c_ptr;
 	int max_nodes; 
-	int alloc;
 
-	c_ptr = (struct cluster *)kmalloc(sizeof(*c_ptr), GFP_ATOMIC);
+	c_ptr = kzalloc(sizeof(*c_ptr), GFP_ATOMIC);
 	if (c_ptr == NULL) {
 		warn("Cluster creation failure, no memory\n");
 		return NULL;
 	}
-	memset(c_ptr, 0, sizeof(*c_ptr));
 
 	c_ptr->addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0);
 	if (in_own_cluster(addr))
 		max_nodes = LOWEST_SLAVE + tipc_max_slaves;
 	else
 		max_nodes = tipc_max_nodes + 1;
-	alloc = sizeof(void *) * (max_nodes + 1);
 
-	c_ptr->nodes = (struct node **)kmalloc(alloc, GFP_ATOMIC);
+	c_ptr->nodes = kcalloc(max_nodes + 1, sizeof(void*), GFP_ATOMIC);
 	if (c_ptr->nodes == NULL) {
 		warn("Cluster creation failure, no memory for node area\n");
 		kfree(c_ptr);
 		return NULL;
 	}
-	memset(c_ptr->nodes, 0, alloc);
 
 	if (in_own_cluster(addr))
 		tipc_local_nodes = c_ptr->nodes;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index c10e18a49b9..693f02eca6d 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -417,12 +417,11 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
 	struct tipc_msg *msg;
 	char *if_name;
 
-	l_ptr = (struct link *)kmalloc(sizeof(*l_ptr), GFP_ATOMIC);
+	l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC);
 	if (!l_ptr) {
 		warn("Link creation failed, no memory\n");
 		return NULL;
 	}
-	memset(l_ptr, 0, sizeof(*l_ptr));
 
 	l_ptr->addr = peer;
 	if_name = strchr(b_ptr->publ.name, ':') + 1;
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index a6926ff07bc..049242ea5c3 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -117,14 +117,12 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper,
 				       u32 scope, u32 node, u32 port_ref,   
 				       u32 key)
 {
-	struct publication *publ =
-		(struct publication *)kmalloc(sizeof(*publ), GFP_ATOMIC);
+	struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC);
 	if (publ == NULL) {
 		warn("Publication creation failure, no memory\n");
 		return NULL;
 	}
 
-	memset(publ, 0, sizeof(*publ));
 	publ->type = type;
 	publ->lower = lower;
 	publ->upper = upper;
@@ -144,11 +142,7 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper,
 
 static struct sub_seq *tipc_subseq_alloc(u32 cnt)
 {
-	u32 sz = cnt * sizeof(struct sub_seq);
-	struct sub_seq *sseq = (struct sub_seq *)kmalloc(sz, GFP_ATOMIC);
-
-	if (sseq)
-		memset(sseq, 0, sz);
+	struct sub_seq *sseq = kcalloc(cnt, sizeof(struct sub_seq), GFP_ATOMIC);
 	return sseq;
 }
 
@@ -160,8 +154,7 @@ static struct sub_seq *tipc_subseq_alloc(u32 cnt)
 
 static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_head)
 {
-	struct name_seq *nseq = 
-		(struct name_seq *)kmalloc(sizeof(*nseq), GFP_ATOMIC);
+	struct name_seq *nseq = kzalloc(sizeof(*nseq), GFP_ATOMIC);
 	struct sub_seq *sseq = tipc_subseq_alloc(1);
 
 	if (!nseq || !sseq) {
@@ -171,7 +164,6 @@ static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_hea
 		return NULL;
 	}
 
-	memset(nseq, 0, sizeof(*nseq));
 	spin_lock_init(&nseq->lock);
 	nseq->type = type;
 	nseq->sseqs = sseq;
@@ -1060,7 +1052,7 @@ int tipc_nametbl_init(void)
 {
 	int array_size = sizeof(struct hlist_head) * tipc_nametbl_size;
 
-	table.types = (struct hlist_head *)kmalloc(array_size, GFP_ATOMIC);
+	table.types = kmalloc(array_size, GFP_ATOMIC);
 	if (!table.types)
 		return -ENOMEM;
 
diff --git a/net/tipc/net.c b/net/tipc/net.c
index e5a359ab493..a991bf8a7f7 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -160,14 +160,11 @@ void tipc_net_send_external_routes(u32 dest)
 
 static int net_init(void)
 {
-	u32 sz = sizeof(struct _zone *) * (tipc_max_zones + 1);
-
 	memset(&tipc_net, 0, sizeof(tipc_net));
-	tipc_net.zones = (struct _zone **)kmalloc(sz, GFP_ATOMIC);
+	tipc_net.zones = kcalloc(tipc_max_zones + 1, sizeof(struct _zone *), GFP_ATOMIC);
 	if (!tipc_net.zones) {
 		return -ENOMEM;
 	}
-	memset(tipc_net.zones, 0, sz);
 	return TIPC_OK;
 }
 
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 3251c8d8e53..b9c8c6b9e94 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -226,12 +226,11 @@ u32 tipc_createport_raw(void *usr_handle,
 	struct tipc_msg *msg;
 	u32 ref;
 
-	p_ptr = kmalloc(sizeof(*p_ptr), GFP_ATOMIC);
+	p_ptr = kzalloc(sizeof(*p_ptr), GFP_ATOMIC);
 	if (!p_ptr) {
 		warn("Port creation failed, no memory\n");
 		return 0;
 	}
-	memset(p_ptr, 0, sizeof(*p_ptr));
 	ref = tipc_ref_acquire(p_ptr, &p_ptr->publ.lock);
 	if (!ref) {
 		warn("Port creation failed, reference table exhausted\n");
@@ -1058,7 +1057,7 @@ int tipc_createport(u32 user_ref,
 	struct port *p_ptr; 
 	u32 ref;
 
-	up_ptr = (struct user_port *)kmalloc(sizeof(*up_ptr), GFP_ATOMIC);
+	up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC);
 	if (!up_ptr) {
 		warn("Port creation failed, no memory\n");
 		return -ENOMEM;
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index e19b4bcd67e..c51600ba5f4 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -393,12 +393,11 @@ static void subscr_named_msg_event(void *usr_handle,
 
 	/* Create subscriber object */
 
-	subscriber = kmalloc(sizeof(struct subscriber), GFP_ATOMIC);
+	subscriber = kzalloc(sizeof(struct subscriber), GFP_ATOMIC);
 	if (subscriber == NULL) {
 		warn("Subscriber rejected, no memory\n");
 		return;
 	}
-	memset(subscriber, 0, sizeof(struct subscriber));
 	INIT_LIST_HEAD(&subscriber->subscription_list);
 	INIT_LIST_HEAD(&subscriber->subscriber_list);
 	subscriber->ref = tipc_ref_acquire(subscriber, &subscriber->lock);
diff --git a/net/tipc/user_reg.c b/net/tipc/user_reg.c
index 1e3ae57c722..04d1b9be9c5 100644
--- a/net/tipc/user_reg.c
+++ b/net/tipc/user_reg.c
@@ -82,9 +82,8 @@ static int reg_init(void)
 	
 	spin_lock_bh(&reg_lock);
 	if (!users) {
-		users = (struct tipc_user *)kmalloc(USER_LIST_SIZE, GFP_ATOMIC);
+		users = kzalloc(USER_LIST_SIZE, GFP_ATOMIC);
 		if (users) {
-			memset(users, 0, USER_LIST_SIZE);
 			for (i = 1; i <= MAX_USERID; i++) {
 				users[i].next = i - 1;
 			}
diff --git a/net/tipc/zone.c b/net/tipc/zone.c
index 316c4872ff5..f5b00ea2d5a 100644
--- a/net/tipc/zone.c
+++ b/net/tipc/zone.c
@@ -52,13 +52,12 @@ struct _zone *tipc_zone_create(u32 addr)
 		return NULL;
 	}
 
-	z_ptr = (struct _zone *)kmalloc(sizeof(*z_ptr), GFP_ATOMIC);
+	z_ptr = kzalloc(sizeof(*z_ptr), GFP_ATOMIC);
 	if (!z_ptr) {
 		warn("Zone creation failed, insufficient memory\n");
 		return NULL;
 	}
 
-	memset(z_ptr, 0, sizeof(*z_ptr));
 	z_num = tipc_zone(addr);
 	z_ptr->addr = tipc_addr(z_num, 0, 0);
 	tipc_net.zones[z_num] = z_ptr;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f70475bfb62..6f290927926 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -663,11 +663,10 @@ static int unix_autobind(struct socket *sock)
 		goto out;
 
 	err = -ENOMEM;
-	addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
+	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 	if (!addr)
 		goto out;
 
-	memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
 	addr->name->sun_family = AF_UNIX;
 	atomic_set(&addr->refcnt, 1);
 
diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c
index a690cf773b6..6f39faa1583 100644
--- a/net/wanrouter/af_wanpipe.c
+++ b/net/wanrouter/af_wanpipe.c
@@ -370,12 +370,11 @@ static int wanpipe_listen_rcv (struct sk_buff *skb,  struct sock *sk)
          * used by the ioctl call to read call information
          * and to execute commands. 
          */	
-	if ((mbox_ptr = kmalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL) {
+	if ((mbox_ptr = kzalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL) {
 		wanpipe_kill_sock_irq (newsk);
 		release_device(dev);		
 		return -ENOMEM;
 	}
-	memset(mbox_ptr, 0, sizeof(mbox_cmd_t));
 	memcpy(mbox_ptr,skb->data,skb->len);
 
 	/* Register the lcn on which incoming call came
@@ -507,11 +506,10 @@ static struct sock *wanpipe_alloc_socket(void)
 	if ((sk = sk_alloc(PF_WANPIPE, GFP_ATOMIC, &wanpipe_proto, 1)) == NULL)
 		return NULL;
 
-	if ((wan_opt = kmalloc(sizeof(struct wanpipe_opt), GFP_ATOMIC)) == NULL) {
+	if ((wan_opt = kzalloc(sizeof(struct wanpipe_opt), GFP_ATOMIC)) == NULL) {
 		sk_free(sk);
 		return NULL;
 	}
-	memset(wan_opt, 0x00, sizeof(struct wanpipe_opt));
 
 	wp_sk(sk) = wan_opt;
 
@@ -2011,10 +2009,9 @@ static int set_ioctl_cmd (struct sock *sk, void *arg)
 
 		dev_put(dev);
 		
-		if ((mbox_ptr = kmalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL)
+		if ((mbox_ptr = kzalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL)
 			return -ENOMEM;
 
-		memset(mbox_ptr, 0, sizeof(mbox_cmd_t));
 		wp_sk(sk)->mbox = mbox_ptr;
 
 		wanpipe_link_driver(dev,sk);
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index ad8e8a79779..9479659277a 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -642,18 +642,16 @@ static int wanrouter_device_new_if(struct wan_device *wandev,
 
 	if (cnf->config_id == WANCONFIG_MPPP) {
 #ifdef CONFIG_WANPIPE_MULTPPP
-		pppdev = kmalloc(sizeof(struct ppp_device), GFP_KERNEL);
+		pppdev = kzalloc(sizeof(struct ppp_device), GFP_KERNEL);
 		err = -ENOBUFS;
 		if (pppdev == NULL)
 			goto out;
-		memset(pppdev, 0, sizeof(struct ppp_device));
-		pppdev->dev = kmalloc(sizeof(struct net_device), GFP_KERNEL);
+		pppdev->dev = kzalloc(sizeof(struct net_device), GFP_KERNEL);
 		if (pppdev->dev == NULL) {
 			kfree(pppdev);
 			err = -ENOBUFS;
 			goto out;
 		}
-		memset(pppdev->dev, 0, sizeof(struct net_device));
 		err = wandev->new_if(wandev, (struct net_device *)pppdev, cnf);
 		dev = pppdev->dev;
 #else
@@ -663,11 +661,10 @@ static int wanrouter_device_new_if(struct wan_device *wandev,
 		goto out;
 #endif
 	} else {
-		dev = kmalloc(sizeof(struct net_device), GFP_KERNEL);
+		dev = kzalloc(sizeof(struct net_device), GFP_KERNEL);
 		err = -ENOBUFS;
 		if (dev == NULL)
 			goto out;
-		memset(dev, 0, sizeof(struct net_device));
 		err = wandev->new_if(wandev, dev, cnf);
 	}
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 405b741dff4..f35bc676128 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -307,10 +307,9 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
 {
 	struct xfrm_policy *policy;
 
-	policy = kmalloc(sizeof(struct xfrm_policy), gfp);
+	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
 
 	if (policy) {
-		memset(policy, 0, sizeof(struct xfrm_policy));
 		atomic_set(&policy->refcnt, 1);
 		rwlock_init(&policy->lock);
 		init_timer(&policy->timer);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 43f00fc28a3..0021aad5db4 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -194,10 +194,9 @@ struct xfrm_state *xfrm_state_alloc(void)
 {
 	struct xfrm_state *x;
 
-	x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
+	x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
 
 	if (x) {
-		memset(x, 0, sizeof(struct xfrm_state));
 		atomic_set(&x->refcnt, 1);
 		atomic_set(&x->tunnel_users, 0);
 		INIT_LIST_HEAD(&x->bydst);
-- 
cgit v1.2.3-70-g09d2


From 8265abc082d2283b4ef20237efadb71c6f16ed0c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 21 Jul 2006 15:09:55 -0700
Subject: [IPV4]: Fix nexthop realm dumping for multipath routes

Routing realms exist per nexthop, but are only returned to userspace
for the first nexthop. This is due to the fact that iproute2 only
allows to set the realm for the first nexthop and the kernel refuses
multipath routes where only a single realm is present.

Dump all realms for multipath routes to enable iproute to correctly
display them.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 63864bb846e..9be53a8e72c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -961,10 +961,6 @@ fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 	rtm->rtm_protocol = fi->fib_protocol;
 	if (fi->fib_priority)
 		RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
-#ifdef CONFIG_NET_CLS_ROUTE
-	if (fi->fib_nh[0].nh_tclassid)
-		RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
-#endif
 	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
 		goto rtattr_failure;
 	if (fi->fib_prefsrc)
@@ -974,6 +970,10 @@ fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 			RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
 		if (fi->fib_nh->nh_oif)
 			RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
+#ifdef CONFIG_NET_CLS_ROUTE
+		if (fi->fib_nh[0].nh_tclassid)
+			RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
+#endif
 	}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (fi->fib_nhs > 1) {
@@ -992,6 +992,10 @@ fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 			nhp->rtnh_ifindex = nh->nh_oif;
 			if (nh->nh_gw)
 				RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
+#ifdef CONFIG_NET_CLS_ROUTE
+			if (nh->nh_tclassid)
+				RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid);
+#endif
 			nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
 		} endfor_nexthops(fi);
 		mp_head->rta_type = RTA_MULTIPATH;
-- 
cgit v1.2.3-70-g09d2


From 083edca05ab1fa6efac1ba414018f7f45a4a83ff Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 24 Jul 2006 22:52:10 -0700
Subject: [NETFILTER]: H.323 helper: fix possible NULL-ptr dereference

An RCF message containing a timeout results in a NULL-ptr dereference if
no RRQ has been seen before.

Noticed by the "SATURN tool", reported by Thomas Dillig <tdillig@stanford.edu>
and Isil Dillig <isil@stanford.edu>.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_helper_h323.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
index af35235672d..9a39e296971 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
@@ -1200,7 +1200,7 @@ static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct,
 	tuple.dst.protonum = IPPROTO_TCP;
 
 	exp = __ip_conntrack_expect_find(&tuple);
-	if (exp->master == ct)
+	if (exp && exp->master == ct)
 		return exp;
 	return NULL;
 }
-- 
cgit v1.2.3-70-g09d2


From 72b558235950538da8bf5a8de746a194831c6fe6 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 24 Jul 2006 22:53:12 -0700
Subject: [NETFILTER]: conntrack: fix SYSCTL=n compile

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_standalone.c | 4 ++--
 net/netfilter/nf_conntrack_standalone.c      | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 7bd3c22003a..7a9fa04a467 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -534,6 +534,8 @@ static struct nf_hook_ops ip_conntrack_ops[] = {
 
 /* Sysctl support */
 
+int ip_conntrack_checksum = 1;
+
 #ifdef CONFIG_SYSCTL
 
 /* From ip_conntrack_core.c */
@@ -568,8 +570,6 @@ extern unsigned int ip_ct_generic_timeout;
 static int log_invalid_proto_min = 0;
 static int log_invalid_proto_max = 255;
 
-int ip_conntrack_checksum = 1;
-
 static struct ctl_table_header *ip_ct_sysctl_header;
 
 static ctl_table ip_ct_sysctl_table[] = {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5fcab2ef231..4ef83669996 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -428,6 +428,8 @@ static struct file_operations ct_cpu_seq_fops = {
 
 /* Sysctl support */
 
+int nf_conntrack_checksum = 1;
+
 #ifdef CONFIG_SYSCTL
 
 /* From nf_conntrack_core.c */
@@ -459,8 +461,6 @@ extern unsigned int nf_ct_generic_timeout;
 static int log_invalid_proto_min = 0;
 static int log_invalid_proto_max = 255;
 
-int nf_conntrack_checksum = 1;
-
 static struct ctl_table_header *nf_ct_sysctl_header;
 
 static ctl_table nf_ct_sysctl_table[] = {
-- 
cgit v1.2.3-70-g09d2


From 8cf8fb5687bb37737ea419a0b2143aab49295779 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 24 Jul 2006 22:53:35 -0700
Subject: [NETFILTER]: SNMP NAT: fix byteorder confusion

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_nat_snmp_basic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 0b1b416759c..18b7fbdccb6 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -1255,9 +1255,9 @@ static int help(struct sk_buff **pskb,
 	struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
 
 	/* SNMP replies and originating SNMP traps get mangled */
-	if (udph->source == ntohs(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
+	if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
 		return NF_ACCEPT;
-	if (udph->dest == ntohs(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
+	if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
 		return NF_ACCEPT;
 
 	/* No NAT? */
-- 
cgit v1.2.3-70-g09d2


From d569f1d72f068992d07ab17f7ff9aea7f0d97cdb Mon Sep 17 00:00:00 2001
From: Guillaume Chazarain <guichaz@yahoo.fr>
Date: Mon, 24 Jul 2006 23:45:16 -0700
Subject: [IPV4]: Clear the whole IPCB, this clears also IPCB(skb)->flags.

Signed-off-by: Guillaume Chazarain <guichaz@yahoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 184c78ca79e..212734ca238 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -429,7 +429,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	}
 
 	/* Remove any debris in the socket control block */
-	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
 
 	return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL,
 		       ip_rcv_finish);
-- 
cgit v1.2.3-70-g09d2


From 722874909271a807b243a797c2958e0a12992964 Mon Sep 17 00:00:00 2001
From: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Date: Tue, 25 Jul 2006 16:45:12 -0700
Subject: [IPV4] ipmr: ip multicast route bug fix.

IP multicast route code was reusing an skb which causes use after free
and double free.

From: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>

Note, it is real skb_clone(), not alloc_skb(). Equeued skb contains
the whole half-prepared netlink message plus room for the rest.
It could be also skb_copy(), if we want to be puristic about mangling
cloned data, but original copy is really not going to be used.

Acked-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9ccacf57f08..85893eef6b1 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1578,6 +1578,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 	cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
 
 	if (cache==NULL) {
+		struct sk_buff *skb2;
 		struct net_device *dev;
 		int vif;
 
@@ -1591,12 +1592,18 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 			read_unlock(&mrt_lock);
 			return -ENODEV;
 		}
-		skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
-		skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
-		skb->nh.iph->saddr = rt->rt_src;
-		skb->nh.iph->daddr = rt->rt_dst;
-		skb->nh.iph->version = 0;
-		err = ipmr_cache_unresolved(vif, skb);
+		skb2 = skb_clone(skb, GFP_ATOMIC);
+		if (!skb2) {
+			read_unlock(&mrt_lock);
+			return -ENOMEM;
+		}
+
+		skb2->nh.raw = skb_push(skb2, sizeof(struct iphdr));
+		skb2->nh.iph->ihl = sizeof(struct iphdr)>>2;
+		skb2->nh.iph->saddr = rt->rt_src;
+		skb2->nh.iph->daddr = rt->rt_dst;
+		skb2->nh.iph->version = 0;
+		err = ipmr_cache_unresolved(vif, skb2);
 		read_unlock(&mrt_lock);
 		return err;
 	}
-- 
cgit v1.2.3-70-g09d2


From f59fc7f30b710d45aadf715460b3e60dbe9d3418 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <from-linux-kernel@i-love.sakura.ne.jp>
Date: Tue, 25 Jul 2006 17:05:35 -0700
Subject: [IPV4/IPV6]: Setting 0 for unused port field in RAW IP recvmsg().

From: Tetsuo Handa from-linux-kernel@i-love.sakura.ne.jp

The recvmsg() for raw socket seems to return random u16 value
from the kernel stack memory since port field is not initialized.
But I'm not sure this patch is correct.
Does raw socket return any information stored in port field?

[ BSD defines RAW IP recvmsg to return a sin_port value of zero.
  This is described in Steven's TCP/IP Illustrated Volume 2 on
  page 1055, which is discussing the BSD rip_input() implementation. ]

Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/raw.c | 1 +
 net/ipv6/raw.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net/ipv4')

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bd221ec3f81..62b2762a242 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -609,6 +609,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (sin) {
 		sin->sin_family = AF_INET;
 		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+		sin->sin_port = 0;
 		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
 	}
 	if (inet->cmsg_flags)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index fa1ce0ae123..d57e61ce4a7 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -411,6 +411,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	/* Copy the address. */
 	if (sin6) {
 		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = 0;
 		ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
 		sin6->sin6_flowinfo = 0;
 		sin6->sin6_scope_id = 0;
-- 
cgit v1.2.3-70-g09d2


From 118075b3cdc90e0815362365f3fc64d672ace0d6 Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Sun, 30 Jul 2006 20:21:45 -0700
Subject: [TCP]: fix memory leak in net/ipv4/tcp_probe.c::tcpprobe_read()

Based upon a patch by Jesper Juhl.

Signed-off-by: James Morris <jmorris@namei.org>
Acked-by: Stephen Hemminger <shemminger@osdl.org>
Acked-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_probe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index d7d517a3a23..b3435324b57 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -114,7 +114,7 @@ static int tcpprobe_open(struct inode * inode, struct file * file)
 static ssize_t tcpprobe_read(struct file *file, char __user *buf,
 			     size_t len, loff_t *ppos)
 {
-	int error = 0, cnt;
+	int error = 0, cnt = 0;
 	unsigned char *tbuf;
 
 	if (!buf || len < 0)
-- 
cgit v1.2.3-70-g09d2


From 3687b1dc6fe83a500ba4d3235704594f6a111a2d Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@nanjing-fnst.com>
Date: Sun, 30 Jul 2006 20:35:54 -0700
Subject: [TCP]: SNMPv2 tcpAttemptFails counter error

Refer to RFC2012, tcpAttemptFails is defined as following:
  tcpAttemptFails OBJECT-TYPE
      SYNTAX      Counter32
      MAX-ACCESS  read-only
      STATUS      current
      DESCRIPTION
              "The number of times TCP connections have made a direct
              transition to the CLOSED state from either the SYN-SENT
              state or the SYN-RCVD state, plus the number of times TCP
              connections have made a direct transition to the LISTEN
              state from the SYN-RCVD state."
      ::= { tcp 7 }

When I lookup into RFC793, I found that the state change should occured
under following condition:
  1. SYN-SENT -> CLOSED
     a) Received ACK,RST segment when SYN-SENT state.

  2. SYN-RCVD -> CLOSED
     b) Received SYN segment when SYN-RCVD state(came from LISTEN).
     c) Received RST segment when SYN-RCVD state(came from SYN-SENT).
     d) Received SYN segment when SYN-RCVD state(came from SYN-SENT).

  3. SYN-RCVD -> LISTEN
     e) Received RST segment when SYN-RCVD state(came from LISTEN).

In my test, those direct state transition can not be counted to
tcpAttemptFails.

Signed-off-by: Wei Yongjun <yjwei@nanjing-fnst.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h        | 3 +++
 net/ipv4/tcp_ipv4.c      | 2 --
 net/ipv4/tcp_minisocks.c | 4 +++-
 net/ipv6/tcp_ipv6.c      | 2 --
 4 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'net/ipv4')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0720bddff1e..7a093d0aa0f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -914,6 +914,9 @@ static inline void tcp_set_state(struct sock *sk, int state)
 
 static inline void tcp_done(struct sock *sk)
 {
+	if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
+		TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+
 	tcp_set_state(sk, TCP_CLOSE);
 	tcp_clear_xmit_timers(sk);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f6f39e81429..4b04c3edd4a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -438,7 +438,6 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
 			       It can f.e. if SYNs crossed.
 			     */
 		if (!sock_owned_by_user(sk)) {
-			TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
 			sk->sk_err = err;
 
 			sk->sk_error_report(sk);
@@ -874,7 +873,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 drop_and_free:
 	reqsk_free(req);
 drop:
-	TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
 	return 0;
 }
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0ccb7cb22b1..624e2b2c7f5 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -589,8 +589,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		/* RFC793: "second check the RST bit" and
 		 *	   "fourth, check the SYN bit"
 		 */
-		if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
+		if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
+			TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
 			goto embryonic_reset;
+		}
 
 		/* ACK sequence verified above, just make sure ACK is
 		 * set.  If ACK not set, just silently drop the packet.
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b76fd7fba5f..b843a650be7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -427,7 +427,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	case TCP_SYN_RECV:  /* Cannot happen.
 			       It can, it SYNs are crossed. --ANK */ 
 		if (!sock_owned_by_user(sk)) {
-			TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
 			sk->sk_err = err;
 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
 
@@ -831,7 +830,6 @@ drop:
 	if (req)
 		reqsk_free(req);
 
-	TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
 	return 0; /* don't send reset */
 }
 
-- 
cgit v1.2.3-70-g09d2


From 8d71740c56a9058acc4378504a356d543ff1308b Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Sun, 30 Jul 2006 20:43:36 -0700
Subject: [NET]: Core net changes to generate netevents

Generate netevents for:
- neighbour changes
- routing redirects
- pmtu changes

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/Makefile    |  2 +-
 net/core/neighbour.c | 14 ++++++++------
 net/ipv4/route.c     |  8 ++++++++
 net/ipv6/route.c     |  7 +++++++
 4 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/core/Makefile b/net/core/Makefile
index e9bd2467d5a..2645ba428d4 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
 
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
-obj-y		     += dev.o ethtool.o dev_mcast.o dst.o \
+obj-y		     += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o
 
 obj-$(CONFIG_XFRM) += flow.o
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 7ad681f5e71..5130d2efdbb 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -29,6 +29,7 @@
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/sock.h>
+#include <net/netevent.h>
 #include <linux/rtnetlink.h>
 #include <linux/random.h>
 #include <linux/string.h>
@@ -754,6 +755,7 @@ static void neigh_timer_handler(unsigned long arg)
 			neigh->nud_state = NUD_STALE;
 			neigh->updated = jiffies;
 			neigh_suspect(neigh);
+			notify = 1;
 		}
 	} else if (state & NUD_DELAY) {
 		if (time_before_eq(now, 
@@ -762,6 +764,7 @@ static void neigh_timer_handler(unsigned long arg)
 			neigh->nud_state = NUD_REACHABLE;
 			neigh->updated = jiffies;
 			neigh_connect(neigh);
+			notify = 1;
 			next = neigh->confirmed + neigh->parms->reachable_time;
 		} else {
 			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
@@ -819,6 +822,8 @@ static void neigh_timer_handler(unsigned long arg)
 out:
 		write_unlock(&neigh->lock);
 	}
+	if (notify)
+		call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
 
 #ifdef CONFIG_ARPD
 	if (notify && neigh->parms->app_probes)
@@ -926,9 +931,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 {
 	u8 old;
 	int err;
-#ifdef CONFIG_ARPD
 	int notify = 0;
-#endif
 	struct net_device *dev;
 	int update_isrouter = 0;
 
@@ -948,9 +951,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 			neigh_suspect(neigh);
 		neigh->nud_state = new;
 		err = 0;
-#ifdef CONFIG_ARPD
 		notify = old & NUD_VALID;
-#endif
 		goto out;
 	}
 
@@ -1022,9 +1023,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 		if (!(new & NUD_CONNECTED))
 			neigh->confirmed = jiffies -
 				      (neigh->parms->base_reachable_time << 1);
-#ifdef CONFIG_ARPD
 		notify = 1;
-#endif
 	}
 	if (new == old)
 		goto out;
@@ -1056,6 +1055,9 @@ out:
 			(neigh->flags & ~NTF_ROUTER);
 	}
 	write_unlock_bh(&neigh->lock);
+
+	if (notify)
+		call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
 #ifdef CONFIG_ARPD
 	if (notify && neigh->parms->app_probes)
 		neigh_app_notify(neigh);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2dc6dbb2846..19bd49d69d9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -104,6 +104,7 @@
 #include <net/icmp.h>
 #include <net/xfrm.h>
 #include <net/ip_mp_alg.h>
+#include <net/netevent.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
@@ -1125,6 +1126,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
 	struct rtable *rth, **rthp;
 	u32  skeys[2] = { saddr, 0 };
 	int  ikeys[2] = { dev->ifindex, 0 };
+	struct netevent_redirect netevent;
 
 	if (!in_dev)
 		return;
@@ -1216,6 +1218,11 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
 					rt_drop(rt);
 					goto do_next;
 				}
+				
+				netevent.old = &rth->u.dst;
+				netevent.new = &rt->u.dst;
+				call_netevent_notifiers(NETEVENT_REDIRECT, 
+						        &netevent);
 
 				rt_del(hash, rth);
 				if (!rt_intern_hash(hash, rt, &rt))
@@ -1452,6 +1459,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 		}
 		dst->metrics[RTAX_MTU-1] = mtu;
 		dst_set_expires(dst, ip_rt_mtu_expires);
+		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
 	}
 }
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 87c39c978cd..4b163711f3a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -53,6 +53,7 @@
 #include <linux/rtnetlink.h>
 #include <net/dst.h>
 #include <net/xfrm.h>
+#include <net/netevent.h>
 
 #include <asm/uaccess.h>
 
@@ -742,6 +743,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
 		}
 		dst->metrics[RTAX_MTU-1] = mtu;
+		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
 	}
 }
 
@@ -1155,6 +1157,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
 	struct rt6_info *rt, *nrt = NULL;
 	int strict;
 	struct fib6_node *fn;
+	struct netevent_redirect netevent;
 
 	/*
 	 * Get the "current" route for this destination and
@@ -1252,6 +1255,10 @@ restart:
 	if (ip6_ins_rt(nrt, NULL, NULL, NULL))
 		goto out;
 
+	netevent.old = &rt->u.dst;
+	netevent.new = &nrt->u.dst;
+	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
+
 	if (rt->rt6i_flags&RTF_CACHE) {
 		ip6_del_rt(rt, NULL, NULL, NULL);
 		return;
-- 
cgit v1.2.3-70-g09d2


From 52499afe40387524e9f46ef9ce4695efccdd2ed9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 31 Jul 2006 22:32:09 -0700
Subject: [TCP]: Process linger2 timeout consistently.

Based upon guidance from Alexey Kuznetsov.

When linger2 is active, we check to see if the fin_wait2
timeout is longer than the timewait.  If it is, we schedule
the keepalive timer for the difference between the timewait
timeout and the fin_wait2 timeout.

When this orphan socket is seen by tcp_keepalive_timer()
it will try to transform this fin_wait2 socket into a
fin_wait2 mini-socket, again if linger2 is active.

Not all paths were setting this initial keepalive timer correctly.
The tcp input path was doing it correctly, but tcp_close() wasn't,
potentially making the socket linger longer than it really needs to.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f6a2d9223d0..7b621e44b12 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1659,7 +1659,8 @@ adjudge_to_death:
 			const int tmo = tcp_fin_time(sk);
 
 			if (tmo > TCP_TIMEWAIT_LEN) {
-				inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk));
+				inet_csk_reset_keepalive_timer(sk,
+						tmo - TCP_TIMEWAIT_LEN);
 			} else {
 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
 				goto out;
-- 
cgit v1.2.3-70-g09d2


From b10866fd7dd9ae9b8dd03646d28702a76d624474 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 31 Jul 2006 23:46:18 -0700
Subject: [NETFILTER]: SIP helper: expect RTP streams in both directions

Since we don't know in which direction the first packet will arrive, we
need to create one expectation for each direction, which is currently
prevented by max_expected beeing set to 1.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_sip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
index fc87ce0da40..4f222d6be00 100644
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ b/net/ipv4/netfilter/ip_conntrack_sip.c
@@ -442,7 +442,7 @@ static int __init init(void)
 		sip[i].tuple.src.u.udp.port = htons(ports[i]);
 		sip[i].mask.src.u.udp.port = 0xFFFF;
 		sip[i].mask.dst.protonum = 0xFF;
-		sip[i].max_expected = 1;
+		sip[i].max_expected = 2;
 		sip[i].timeout = 3 * 60; /* 3 minutes */
 		sip[i].me = THIS_MODULE;
 		sip[i].help = sip_help;
-- 
cgit v1.2.3-70-g09d2


From 3ab720881b6e36bd5190a3a11cee8d8d067c4ad7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 31 Jul 2006 23:47:31 -0700
Subject: [NETFILTER]: xt_hashlimit/xt_string: missing string validation

The hashlimit table name and the textsearch algorithm need to be
terminated, the textsearch pattern length must not exceed the
maximum size.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_hashlimit.c | 3 +++
 net/netfilter/xt_string.c          | 5 ++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 92980ab8ce4..6b662449e82 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -508,6 +508,9 @@ hashlimit_checkentry(const char *tablename,
 	if (!r->cfg.expire)
 		return 0;
 
+	if (r->name[sizeof(r->name) - 1] != '\0')
+		return 0;
+
 	/* This is the best we've got: We cannot release and re-grab lock,
 	 * since checkentry() is called before ip_tables.c grabs ipt_mutex.  
 	 * We also cannot grab the hashtable spinlock, since htable_create will 
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 0ebb6ac2c8c..d8e3891b5f8 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -55,7 +55,10 @@ static int checkentry(const char *tablename,
 	/* Damn, can't handle this case properly with iptables... */
 	if (conf->from_offset > conf->to_offset)
 		return 0;
-
+	if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0')
+	    	return 0;
+	if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE)
+		return 0;
 	ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
 				     GFP_KERNEL, TS_AUTOLOAD);
 	if (IS_ERR(ts_conf))
-- 
cgit v1.2.3-70-g09d2


From dafee490858f79e144c5e6cdd84ceb9efa20a3f1 Mon Sep 17 00:00:00 2001
From: Wei Dong <weid@nanjing-fnst.com>
Date: Wed, 2 Aug 2006 13:41:21 -0700
Subject: [IPV6]: SNMPv2 "ipv6IfStatsOutFragCreates" counter error

  When I tested linux kernel 2.6.71.7 about statistics
"ipv6IfStatsOutFragCreates", and found that it couldn't increase
correctly. The criteria is RFC 2465:

  ipv6IfStatsOutFragCreates OBJECT-TYPE
      SYNTAX      Counter32
      MAX-ACCESS  read-only
      STATUS      current
      DESCRIPTION
         "The number of output datagram fragments that have
         been generated as a result of fragmentation at
         this output interface."
      ::= { ipv6IfStatsEntry 15 }

I think there are two issues in Linux kernel.
1st:
RFC2465 specifies the counter is "The number of output datagram
fragments...". I think increasing this counter after output a fragment
successfully is better. And it should not be increased even though a
fragment is created but failed to output.

2nd:
If we send a big ICMP/ICMPv6 echo request to a host, and receive
ICMP/ICMPv6 echo reply consisted of some fragments. As we know that in
Linux kernel first fragmentation occurs in ICMP layer(maybe saying
transport layer is better), but this is not the "real"
fragmentation,just do some "pre-fragment" -- allocate space for date,
and form a frag_list, etc. The "real" fragmentation happens in IP layer
-- set offset and MF flag and so on. So I think in "fast path" for
ip_fragment/ip6_fragment, if we send a fragment which "pre-fragment" by
upper layer we should also increase "ipv6IfStatsOutFragCreates".

Signed-off-by: Wei Dong <weid@nanjing-fnst.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c  | 7 ++++---
 net/ipv6/ip6_output.c | 8 +++++---
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7c9f9a6421b..9bf307a2978 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -526,6 +526,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 
 			err = output(skb);
 
+			if (!err)
+				IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
 			if (err || !frag)
 				break;
 
@@ -649,9 +651,6 @@ slow_path:
 		/*
 		 *	Put this fragment into the sending queue.
 		 */
-
-		IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
-
 		iph->tot_len = htons(len + hlen);
 
 		ip_send_check(iph);
@@ -659,6 +658,8 @@ slow_path:
 		err = output(skb2);
 		if (err)
 			goto fail;
+
+		IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
 	}
 	kfree_skb(skb);
 	IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 70c9234b70e..69451af6abe 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -596,6 +596,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			}
 			
 			err = output(skb);
+			if(!err)
+				IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
+
 			if (err || !frag)
 				break;
 
@@ -707,12 +710,11 @@ slow_path:
 		/*
 		 *	Put this fragment into the sending queue.
 		 */
-
-		IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
-
 		err = output(frag);
 		if (err)
 			goto fail;
+
+		IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
 	}
 	kfree_skb(skb);
 	IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
-- 
cgit v1.2.3-70-g09d2


From dc49c1f94e3469d94b952e8f5160dd4ccd791d79 Mon Sep 17 00:00:00 2001
From: Catherine Zhang <cxzhang@watson.ibm.com>
Date: Wed, 2 Aug 2006 14:12:06 -0700
Subject: [AF_UNIX]: Kernel memory leak fix for af_unix datagram getpeersec
 patch

From: Catherine Zhang <cxzhang@watson.ibm.com>

This patch implements a cleaner fix for the memory leak problem of the
original unix datagram getpeersec patch.  Instead of creating a
security context each time a unix datagram is sent, we only create the
security context when the receiver requests it.

This new design requires modification of the current
unix_getsecpeer_dgram LSM hook and addition of two new hooks, namely,
secid_to_secctx and release_secctx.  The former retrieves the security
context and the latter releases it.  A hook is required for releasing
the security context because it is up to the security module to decide
how that's done.  In the case of Selinux, it's a simple kfree
operation.

Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/security.h | 41 +++++++++++++++++++++++++++++++++++------
 include/net/af_unix.h    |  6 ++----
 include/net/scm.h        | 29 +++++++++++++++++++++++++----
 net/ipv4/ip_sockglue.c   |  9 +++++++--
 net/unix/af_unix.c       | 17 +++++------------
 security/dummy.c         | 14 ++++++++++++--
 security/selinux/hooks.c | 38 ++++++++++++++++++++++++--------------
 7 files changed, 110 insertions(+), 44 deletions(-)

(limited to 'net/ipv4')

diff --git a/include/linux/security.h b/include/linux/security.h
index f75303831d0..aa5b8043dc3 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1109,6 +1109,16 @@ struct swap_info_struct;
  *	@name contains the name of the security module being unstacked.
  *	@ops contains a pointer to the struct security_operations of the module to unstack.
  * 
+ * @secid_to_secctx:
+ *	Convert secid to security context.
+ *	@secid contains the security ID.
+ *	@secdata contains the pointer that stores the converted security context.
+ *
+ * @release_secctx:
+ *	Release the security context.
+ *	@secdata contains the security context.
+ *	@seclen contains the length of the security context.
+ *
  * This is the main security structure.
  */
 struct security_operations {
@@ -1289,6 +1299,8 @@ struct security_operations {
 
  	int (*getprocattr)(struct task_struct *p, char *name, void *value, size_t size);
  	int (*setprocattr)(struct task_struct *p, char *name, void *value, size_t size);
+	int (*secid_to_secctx)(u32 secid, char **secdata, u32 *seclen);
+	void (*release_secctx)(char *secdata, u32 seclen);
 
 #ifdef CONFIG_SECURITY_NETWORK
 	int (*unix_stream_connect) (struct socket * sock,
@@ -1317,7 +1329,7 @@ struct security_operations {
 	int (*socket_shutdown) (struct socket * sock, int how);
 	int (*socket_sock_rcv_skb) (struct sock * sk, struct sk_buff * skb);
 	int (*socket_getpeersec_stream) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len);
-	int (*socket_getpeersec_dgram) (struct sk_buff *skb, char **secdata, u32 *seclen);
+	int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid);
 	int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority);
 	void (*sk_free_security) (struct sock *sk);
 	unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir);
@@ -2059,6 +2071,16 @@ static inline int security_netlink_recv(struct sk_buff * skb, int cap)
 	return security_ops->netlink_recv(skb, cap);
 }
 
+static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
+{
+	return security_ops->secid_to_secctx(secid, secdata, seclen);
+}
+
+static inline void security_release_secctx(char *secdata, u32 seclen)
+{
+	return security_ops->release_secctx(secdata, seclen);
+}
+
 /* prototypes */
 extern int security_init	(void);
 extern int register_security	(struct security_operations *ops);
@@ -2725,6 +2747,15 @@ static inline void securityfs_remove(struct dentry *dentry)
 {
 }
 
+static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void security_release_secctx(char *secdata, u32 seclen)
+{
+	return -EOPNOTSUPP;
+}
 #endif	/* CONFIG_SECURITY */
 
 #ifdef CONFIG_SECURITY_NETWORK
@@ -2840,10 +2871,9 @@ static inline int security_socket_getpeersec_stream(struct socket *sock, char __
 	return security_ops->socket_getpeersec_stream(sock, optval, optlen, len);
 }
 
-static inline int security_socket_getpeersec_dgram(struct sk_buff *skb, char **secdata,
-						   u32 *seclen)
+static inline int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid)
 {
-	return security_ops->socket_getpeersec_dgram(skb, secdata, seclen);
+	return security_ops->socket_getpeersec_dgram(sock, skb, secid);
 }
 
 static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
@@ -2968,8 +2998,7 @@ static inline int security_socket_getpeersec_stream(struct socket *sock, char __
 	return -ENOPROTOOPT;
 }
 
-static inline int security_socket_getpeersec_dgram(struct sk_buff *skb, char **secdata,
-						   u32 *seclen)
+static inline int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid)
 {
 	return -ENOPROTOOPT;
 }
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 2fec827c880..c0398f5a8cb 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -54,15 +54,13 @@ struct unix_skb_parms {
 	struct ucred		creds;		/* Skb credentials	*/
 	struct scm_fp_list	*fp;		/* Passed files		*/
 #ifdef CONFIG_SECURITY_NETWORK
-	char			*secdata;	/* Security context	*/
-	u32			seclen;		/* Security length	*/
+	u32			secid;		/* Security ID		*/
 #endif
 };
 
 #define UNIXCB(skb) 	(*(struct unix_skb_parms*)&((skb)->cb))
 #define UNIXCREDS(skb)	(&UNIXCB((skb)).creds)
-#define UNIXSECDATA(skb)	(&UNIXCB((skb)).secdata)
-#define UNIXSECLEN(skb)		(&UNIXCB((skb)).seclen)
+#define UNIXSID(skb)	(&UNIXCB((skb)).secid)
 
 #define unix_state_rlock(s)	spin_lock(&unix_sk(s)->lock)
 #define unix_state_runlock(s)	spin_unlock(&unix_sk(s)->lock)
diff --git a/include/net/scm.h b/include/net/scm.h
index 02daa097cdc..5637d5e22d5 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -3,6 +3,7 @@
 
 #include <linux/limits.h>
 #include <linux/net.h>
+#include <linux/security.h>
 
 /* Well, we should have at least one descriptor open
  * to accept passed FDs 8)
@@ -20,8 +21,7 @@ struct scm_cookie
 	struct ucred		creds;		/* Skb credentials	*/
 	struct scm_fp_list	*fp;		/* Passed files		*/
 #ifdef CONFIG_SECURITY_NETWORK
-	char			*secdata;	/* Security context	*/
-	u32			seclen;		/* Security length	*/
+	u32			secid;		/* Passed security ID 	*/
 #endif
 	unsigned long		seq;		/* Connection seqno	*/
 };
@@ -32,6 +32,16 @@ extern int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie
 extern void __scm_destroy(struct scm_cookie *scm);
 extern struct scm_fp_list * scm_fp_dup(struct scm_fp_list *fpl);
 
+#ifdef CONFIG_SECURITY_NETWORK
+static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_cookie *scm)
+{
+	security_socket_getpeersec_dgram(sock, NULL, &scm->secid);
+}
+#else
+static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_cookie *scm)
+{ }
+#endif /* CONFIG_SECURITY_NETWORK */
+
 static __inline__ void scm_destroy(struct scm_cookie *scm)
 {
 	if (scm && scm->fp)
@@ -47,6 +57,7 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
 	scm->creds.pid = p->tgid;
 	scm->fp = NULL;
 	scm->seq = 0;
+	unix_get_peersec_dgram(sock, scm);
 	if (msg->msg_controllen <= 0)
 		return 0;
 	return __scm_send(sock, msg, scm);
@@ -55,8 +66,18 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
 #ifdef CONFIG_SECURITY_NETWORK
 static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm)
 {
-	if (test_bit(SOCK_PASSSEC, &sock->flags) && scm->secdata != NULL)
-		put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, scm->seclen, scm->secdata);
+	char *secdata;
+	u32 seclen;
+	int err;
+
+	if (test_bit(SOCK_PASSSEC, &sock->flags)) {
+		err = security_secid_to_secctx(scm->secid, &secdata, &seclen);
+
+		if (!err) {
+			put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, seclen, secdata);
+			security_release_secctx(secdata, seclen);
+		}
+	}
 }
 #else
 static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 84f43a3c909..2d05c4133d3 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -112,14 +112,19 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
 static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
 {
 	char *secdata;
-	u32 seclen;
+	u32 seclen, secid;
 	int err;
 
-	err = security_socket_getpeersec_dgram(skb, &secdata, &seclen);
+	err = security_socket_getpeersec_dgram(NULL, skb, &secid);
+	if (err)
+		return;
+
+	err = security_secid_to_secctx(secid, &secdata, &seclen);
 	if (err)
 		return;
 
 	put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata);
+	security_release_secctx(secdata, seclen);
 }
 
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 6f290927926..de6ec519272 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -128,23 +128,17 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0);
 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
 
 #ifdef CONFIG_SECURITY_NETWORK
-static void unix_get_peersec_dgram(struct sk_buff *skb)
+static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 {
-	int err;
-
-	err = security_socket_getpeersec_dgram(skb, UNIXSECDATA(skb),
-					       UNIXSECLEN(skb));
-	if (err)
-		*(UNIXSECDATA(skb)) = NULL;
+	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
 }
 
 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 {
-	scm->secdata = *UNIXSECDATA(skb);
-	scm->seclen = *UNIXSECLEN(skb);
+	scm->secid = *UNIXSID(skb);
 }
 #else
-static inline void unix_get_peersec_dgram(struct sk_buff *skb)
+static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 { }
 
 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
@@ -1322,8 +1316,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
 	if (siocb->scm->fp)
 		unix_attach_fds(siocb->scm, skb);
-
-	unix_get_peersec_dgram(skb);
+	unix_get_secdata(siocb->scm, skb);
 
 	skb->h.raw = skb->data;
 	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
diff --git a/security/dummy.c b/security/dummy.c
index bbbfda70e13..58c6d399c84 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -791,8 +791,7 @@ static int dummy_socket_getpeersec_stream(struct socket *sock, char __user *optv
 	return -ENOPROTOOPT;
 }
 
-static int dummy_socket_getpeersec_dgram(struct sk_buff *skb, char **secdata,
-					 u32 *seclen)
+static int dummy_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid)
 {
 	return -ENOPROTOOPT;
 }
@@ -876,6 +875,15 @@ static int dummy_setprocattr(struct task_struct *p, char *name, void *value, siz
 	return -EINVAL;
 }
 
+static int dummy_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
+{
+	return -EOPNOTSUPP;
+}
+
+static void dummy_release_secctx(char *secdata, u32 seclen)
+{
+}
+
 #ifdef CONFIG_KEYS
 static inline int dummy_key_alloc(struct key *key, struct task_struct *ctx,
 				  unsigned long flags)
@@ -1028,6 +1036,8 @@ void security_fixup_ops (struct security_operations *ops)
 	set_to_dummy_if_null(ops, d_instantiate);
  	set_to_dummy_if_null(ops, getprocattr);
  	set_to_dummy_if_null(ops, setprocattr);
+ 	set_to_dummy_if_null(ops, secid_to_secctx);
+ 	set_to_dummy_if_null(ops, release_secctx);
 #ifdef CONFIG_SECURITY_NETWORK
 	set_to_dummy_if_null(ops, unix_stream_connect);
 	set_to_dummy_if_null(ops, unix_may_send);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index a91c961ba38..5d1b8c73319 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3524,25 +3524,21 @@ out:
 	return err;
 }
 
-static int selinux_socket_getpeersec_dgram(struct sk_buff *skb, char **secdata, u32 *seclen)
+static int selinux_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid)
 {
+	u32 peer_secid = SECSID_NULL;
 	int err = 0;
-	u32 peer_sid;
 
-	if (skb->sk->sk_family == PF_UNIX)
-		selinux_get_inode_sid(SOCK_INODE(skb->sk->sk_socket),
-				      &peer_sid);
-	else
-		peer_sid = selinux_socket_getpeer_dgram(skb);
-
-	if (peer_sid == SECSID_NULL)
-		return -EINVAL;
+	if (sock && (sock->sk->sk_family == PF_UNIX))
+		selinux_get_inode_sid(SOCK_INODE(sock), &peer_secid);
+	else if (skb)
+		peer_secid = selinux_socket_getpeer_dgram(skb);
 
-	err = security_sid_to_context(peer_sid, secdata, seclen);
-	if (err)
-		return err;
+	if (peer_secid == SECSID_NULL)
+		err = -EINVAL;
+	*secid = peer_secid;
 
-	return 0;
+	return err;
 }
 
 static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority)
@@ -4407,6 +4403,17 @@ static int selinux_setprocattr(struct task_struct *p,
 	return size;
 }
 
+static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
+{
+	return security_sid_to_context(secid, secdata, seclen);
+}
+
+static void selinux_release_secctx(char *secdata, u32 seclen)
+{
+	if (secdata)
+		kfree(secdata);
+}
+
 #ifdef CONFIG_KEYS
 
 static int selinux_key_alloc(struct key *k, struct task_struct *tsk,
@@ -4587,6 +4594,9 @@ static struct security_operations selinux_ops = {
 	.getprocattr =                  selinux_getprocattr,
 	.setprocattr =                  selinux_setprocattr,
 
+	.secid_to_secctx =		selinux_secid_to_secctx,
+	.release_secctx =		selinux_release_secctx,
+
         .unix_stream_connect =		selinux_socket_unix_stream_connect,
 	.unix_may_send =		selinux_socket_unix_may_send,
 
-- 
cgit v1.2.3-70-g09d2


From 29bbd72d6ee1dbf2d9f00d022f8e999aa528fb3a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 2 Aug 2006 15:02:31 -0700
Subject: [NET]: Fix more per-cpu typos

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netdma.h | 2 +-
 net/core/dev.c       | 4 ++--
 net/ipv4/tcp.c       | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net/ipv4')

diff --git a/include/net/netdma.h b/include/net/netdma.h
index ceae5ee85c0..7f53cd1d8b1 100644
--- a/include/net/netdma.h
+++ b/include/net/netdma.h
@@ -29,7 +29,7 @@ static inline struct dma_chan *get_softnet_dma(void)
 {
 	struct dma_chan *chan;
 	rcu_read_lock();
-	chan = rcu_dereference(__get_cpu_var(softnet_data.net_dma));
+	chan = rcu_dereference(__get_cpu_var(softnet_data).net_dma);
 	if (chan)
 		dma_chan_get(chan);
 	rcu_read_unlock();
diff --git a/net/core/dev.c b/net/core/dev.c
index f25d7ecaf03..d95e2626d94 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3421,7 +3421,7 @@ static void net_dma_rebalance(void)
 
 	if (net_dma_count == 0) {
 		for_each_online_cpu(cpu)
-			rcu_assign_pointer(per_cpu(softnet_data.net_dma, cpu), NULL);
+			rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
 		return;
 	}
 
@@ -3434,7 +3434,7 @@ static void net_dma_rebalance(void)
 		   + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
 
 		while(n) {
-			per_cpu(softnet_data.net_dma, cpu) = chan;
+			per_cpu(softnet_data, cpu).net_dma = chan;
 			cpu = next_cpu(cpu, cpu_online_map);
 			n--;
 		}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7b621e44b12..934396bb137 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1132,7 +1132,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	tp->ucopy.dma_chan = NULL;
 	preempt_disable();
 	if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
-	    !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) {
+	    !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) {
 		preempt_enable_no_resched();
 		tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len);
 	} else
-- 
cgit v1.2.3-70-g09d2


From d254bcdbf2199d9e2a52dbe4592e79ef3a456096 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Fri, 4 Aug 2006 16:57:42 -0700
Subject: [TCP]: Fixes IW > 2 cases when TCP is application limited
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Whenever a transfer is application limited, we are allowed at least
initial window worth of data per window unless cwnd is previously
less than that.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 738dad9f7d4..104af5d5bcb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3541,7 +3541,8 @@ void tcp_cwnd_application_limited(struct sock *sk)
 	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
 	    sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
 		/* Limited by application or receiver window. */
-		u32 win_used = max(tp->snd_cwnd_used, 2U);
+		u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
+		u32 win_used = max(tp->snd_cwnd_used, init_win);
 		if (win_used < tp->snd_cwnd) {
 			tp->snd_ssthresh = tcp_current_ssthresh(sk);
 			tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
-- 
cgit v1.2.3-70-g09d2


From 8d1502de27c46b365b5c86e17d173083d3d6c9ac Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Mon, 7 Aug 2006 20:44:22 -0700
Subject: [IPV4]: Limit rt cache size properly.

From: Kirill Korotaev <dev@sw.ru>

During OpenVZ stress testing we found that UDP traffic with random src
can generate too much excessive rt hash growing leading finally to OOM
and kernel panics.

It was found that for 4GB i686 system (having 1048576 total pages and
  225280 normal zone pages) kernel allocates the following route hash:
syslog: IP route cache hash table entries: 262144 (order: 8, 1048576
bytes) => ip_rt_max_size = 4194304 entries, i.e.  max rt size is
4194304 * 256b = 1Gb of RAM > normal_zone

Attached the patch which removes HASH_HIGHMEM flag from
alloc_large_system_hash() call.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 19bd49d69d9..b873cbcdd0b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3157,7 +3157,7 @@ int __init ip_rt_init(void)
 					rhash_entries,
 					(num_physpages >= 128 * 1024) ?
 					15 : 17,
-					HASH_HIGHMEM,
+					0,
 					&rt_hash_log,
 					&rt_hash_mask,
 					0);
-- 
cgit v1.2.3-70-g09d2


From bd37a088596ccdb2b2dd3299e25e333bca7a9a34 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@nanjing-fnst.com>
Date: Mon, 7 Aug 2006 21:04:15 -0700
Subject: [TCP]: SNMPv2 tcpOutSegs counter error

Do not count retransmitted segments.

Signed-off-by: Wei Yongjun <yjwei@nanjing-fnst.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5c08ea20a18..507adefbc17 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -466,7 +466,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	if (skb->len != tcp_header_size)
 		tcp_event_data_sent(tp, skb, sk);
 
-	TCP_INC_STATS(TCP_MIB_OUTSEGS);
+	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
+		TCP_INC_STATS(TCP_MIB_OUTSEGS);
 
 	err = icsk->icsk_af_ops->queue_xmit(skb, 0);
 	if (likely(err <= 0))
@@ -2157,10 +2158,9 @@ int tcp_connect(struct sock *sk)
 	skb_shinfo(buff)->gso_size = 0;
 	skb_shinfo(buff)->gso_type = 0;
 	buff->csum = 0;
+	tp->snd_nxt = tp->write_seq;
 	TCP_SKB_CB(buff)->seq = tp->write_seq++;
 	TCP_SKB_CB(buff)->end_seq = tp->write_seq;
-	tp->snd_nxt = tp->write_seq;
-	tp->pushed_seq = tp->write_seq;
 
 	/* Send it off. */
 	TCP_SKB_CB(buff)->when = tcp_time_stamp;
@@ -2170,6 +2170,12 @@ int tcp_connect(struct sock *sk)
 	sk_charge_skb(sk, buff);
 	tp->packets_out += tcp_skb_pcount(buff);
 	tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
+
+	/* We change tp->snd_nxt after the tcp_transmit_skb() call
+	 * in order to make this packet get counted in tcpOutSegs.
+	 */
+	tp->snd_nxt = tp->write_seq;
+	tp->pushed_seq = tp->write_seq;
 	TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
 
 	/* Timer for repeating the SYN until an answer. */
-- 
cgit v1.2.3-70-g09d2


From 18b6fe64d4d1f6e0a2c71429a5e5074f43e29203 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 13 Aug 2006 18:05:09 -0700
Subject: [TCP]: Fix botched memory leak fix to tcpprobe_read().

Somehow I clobbered James's original fix and only my
subsequent compiler warning change went in for that
changeset.

Get the real fix in there.

Noticed by Jesper Juhl.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_probe.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index b3435324b57..dab37d2f65f 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -130,11 +130,12 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf,
 	error = wait_event_interruptible(tcpw.wait,
 					 __kfifo_len(tcpw.fifo) != 0);
 	if (error)
-		return error;
+		goto out_free;
 
 	cnt = kfifo_get(tcpw.fifo, tbuf, len);
 	error = copy_to_user(buf, tbuf, cnt);
 
+out_free:
 	vfree(tbuf);
 
 	return error ? error : cnt;
-- 
cgit v1.2.3-70-g09d2


From 1c7628bd7a458faf7c96ef521f6d3a5ea9b106b8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 13 Aug 2006 18:06:02 -0700
Subject: [NETFILTER]: xt_hashlimit: fix limit off-by-one

Hashlimit doesn't account for the first packet, which is inconsistent
with the limit match.

Reported by ryan.castellucci@gmail.com, netfilter bugzilla #500.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_hashlimit.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 6b662449e82..3bd2368e1fc 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -454,15 +454,12 @@ hashlimit_match(const struct sk_buff *skb,
 		dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * 
 							hinfo->cfg.burst);
 		dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
-
-		spin_unlock_bh(&hinfo->lock);
-		return 1;
+	} else {
+		/* update expiration timeout */
+		dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
+		rateinfo_recalc(dh, now);
 	}
 
-	/* update expiration timeout */
-	dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
-
-	rateinfo_recalc(dh, now);
 	if (dh->rateinfo.credit >= dh->rateinfo.cost) {
 		/* We're underlimit. */
 		dh->rateinfo.credit -= dh->rateinfo.cost;
-- 
cgit v1.2.3-70-g09d2


From 0eff66e625306a794ecba4b29ed12f7a147ce219 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 13 Aug 2006 18:57:28 -0700
Subject: [NETFILTER]: {arp,ip,ip6}_tables: proper error recovery in init path

Neither of {arp,ip,ip6}_tables cleans up behind itself when something goes
wrong during initialization.

Noticed by Rennie deGraaf <degraaf@cpsc.ucalgary.ca>

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 27 ++++++++++++++++++++-------
 net/ipv4/netfilter/ip_tables.c  | 33 +++++++++++++++++++++++++--------
 net/ipv6/netfilter/ip6_tables.c | 34 +++++++++++++++++++++++++---------
 3 files changed, 70 insertions(+), 24 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 80c73ca9011..df4854cf598 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1170,21 +1170,34 @@ static int __init arp_tables_init(void)
 {
 	int ret;
 
-	xt_proto_init(NF_ARP);
+	ret = xt_proto_init(NF_ARP);
+	if (ret < 0)
+		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
-	xt_register_target(&arpt_standard_target);
-	xt_register_target(&arpt_error_target);
+	ret = xt_register_target(&arpt_standard_target);
+	if (ret < 0)
+		goto err2;
+	ret = xt_register_target(&arpt_error_target);
+	if (ret < 0)
+		goto err3;
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&arpt_sockopts);
-	if (ret < 0) {
-		duprintf("Unable to register sockopts.\n");
-		return ret;
-	}
+	if (ret < 0)
+		goto err4;
 
 	printk("arp_tables: (C) 2002 David S. Miller\n");
 	return 0;
+
+err4:
+	xt_unregister_target(&arpt_error_target);
+err3:
+	xt_unregister_target(&arpt_standard_target);
+err2:
+	xt_proto_fini(NF_ARP);
+err1:
+	return ret;
 }
 
 static void __exit arp_tables_fini(void)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index fc5bdd5eb7d..f316ff5fd8a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2239,22 +2239,39 @@ static int __init ip_tables_init(void)
 {
 	int ret;
 
-	xt_proto_init(AF_INET);
+	ret = xt_proto_init(AF_INET);
+	if (ret < 0)
+		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
-	xt_register_target(&ipt_standard_target);
-	xt_register_target(&ipt_error_target);
-	xt_register_match(&icmp_matchstruct);
+	ret = xt_register_target(&ipt_standard_target);
+	if (ret < 0)
+		goto err2;
+	ret = xt_register_target(&ipt_error_target);
+	if (ret < 0)
+		goto err3;
+	ret = xt_register_match(&icmp_matchstruct);
+	if (ret < 0)
+		goto err4;
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&ipt_sockopts);
-	if (ret < 0) {
-		duprintf("Unable to register sockopts.\n");
-		return ret;
-	}
+	if (ret < 0)
+		goto err5;
 
 	printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
 	return 0;
+
+err5:
+	xt_unregister_match(&icmp_matchstruct);
+err4:
+	xt_unregister_target(&ipt_error_target);
+err3:
+	xt_unregister_target(&ipt_standard_target);
+err2:
+	xt_proto_fini(AF_INET);
+err1:
+	return ret;
 }
 
 static void __exit ip_tables_fini(void)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f26898b0034..c9d6b23cd3f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1398,23 +1398,39 @@ static int __init ip6_tables_init(void)
 {
 	int ret;
 
-	xt_proto_init(AF_INET6);
+	ret = xt_proto_init(AF_INET6);
+	if (ret < 0)
+		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
-	xt_register_target(&ip6t_standard_target);
-	xt_register_target(&ip6t_error_target);
-	xt_register_match(&icmp6_matchstruct);
+	ret = xt_register_target(&ip6t_standard_target);
+	if (ret < 0)
+		goto err2;
+	ret = xt_register_target(&ip6t_error_target);
+	if (ret < 0)
+		goto err3;
+	ret = xt_register_match(&icmp6_matchstruct);
+	if (ret < 0)
+		goto err4;
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&ip6t_sockopts);
-	if (ret < 0) {
-		duprintf("Unable to register sockopts.\n");
-		xt_proto_fini(AF_INET6);
-		return ret;
-	}
+	if (ret < 0)
+		goto err5;
 
 	printk("ip6_tables: (C) 2000-2006 Netfilter Core Team\n");
 	return 0;
+
+err5:
+	xt_unregister_match(&icmp6_matchstruct);
+err4:
+	xt_unregister_target(&ip6t_error_target);
+err3:
+	xt_unregister_target(&ip6t_standard_target);
+err2:
+	xt_proto_fini(AF_INET6);
+err1:
+	return ret;
 }
 
 static void __exit ip6_tables_fini(void)
-- 
cgit v1.2.3-70-g09d2


From dcb7cd97f133f7cfbd181149a1e60215a869f895 Mon Sep 17 00:00:00 2001
From: Mark Huang <mlhuang@cs.princeton.edu>
Date: Sun, 13 Aug 2006 18:57:54 -0700
Subject: [NETFILTER]: ulog: fix panic on SMP kernels

Fix kernel panic on various SMP machines. The culprit is a null
ub->skb in ulog_send(). If ulog_timer() has already been scheduled on
one CPU and is spinning on the lock, and ipt_ulog_packet() flushes the
queue on another CPU by calling ulog_send() right before it exits,
there will be no skbuff when ulog_timer() acquires the lock and calls
ulog_send(). Cancelling the timer in ulog_send() doesn't help because
it has already been scheduled and is running on the first CPU.

Similar problem exists in ebt_ulog.c and nfnetlink_log.c.

Signed-off-by: Mark Huang <mlhuang@cs.princeton.edu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/ebt_ulog.c | 3 +++
 net/ipv4/netfilter/ipt_ULOG.c   | 5 +++++
 net/netfilter/nfnetlink_log.c   | 3 +++
 3 files changed, 11 insertions(+)

(limited to 'net/ipv4')

diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 02693a230dc..9f950db3b76 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -74,6 +74,9 @@ static void ulog_send(unsigned int nlgroup)
 	if (timer_pending(&ub->timer))
 		del_timer(&ub->timer);
 
+	if (!ub->skb)
+		return;
+
 	/* last nlmsg needs NLMSG_DONE */
 	if (ub->qlen > 1)
 		ub->lastnlh->nlmsg_type = NLMSG_DONE;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index d7dd7fe7051..d46fd677fa1 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -115,6 +115,11 @@ static void ulog_send(unsigned int nlgroupnum)
 		del_timer(&ub->timer);
 	}
 
+	if (!ub->skb) {
+		DEBUGP("ipt_ULOG: ulog_send: nothing to send\n");
+		return;
+	}
+
 	/* last nlmsg needs NLMSG_DONE */
 	if (ub->qlen > 1)
 		ub->lastnlh->nlmsg_type = NLMSG_DONE;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 61cdda4e5d3..b59d3b2bde2 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -366,6 +366,9 @@ __nfulnl_send(struct nfulnl_instance *inst)
 	if (timer_pending(&inst->timer))
 		del_timer(&inst->timer);
 
+	if (!inst->skb)
+		return 0;
+
 	if (inst->qlen > 1)
 		inst->lastnlh->nlmsg_type = NLMSG_DONE;
 
-- 
cgit v1.2.3-70-g09d2


From e9fa4f7bd291c29a785666e2fa5a9cf3241ee6c3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2006 20:12:58 -0700
Subject: [INET]: Use pskb_trim_unique when trimming paged unique skbs

The IPv4/IPv6 datagram output path was using skb_trim to trim paged
packets because they know that the packet has not been cloned yet
(since the packet hasn't been given to anything else in the system).

This broke because skb_trim no longer allows paged packets to be
trimmed.  Paged packets must be given to one of the pskb_trim functions
instead.

This patch adds a new pskb_trim_unique function to cover the IPv4/IPv6
datagram output path scenario and replaces the corresponding skb_trim
calls with it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 15 +++++++++++++++
 net/ipv4/ip_output.c   |  4 ++--
 net/ipv6/ip6_output.c  |  2 +-
 3 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3573ba9a255..755e9cddac4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1039,6 +1039,21 @@ static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
 	return (len < skb->len) ? __pskb_trim(skb, len) : 0;
 }
 
+/**
+ *	pskb_trim_unique - remove end from a paged unique (not cloned) buffer
+ *	@skb: buffer to alter
+ *	@len: new length
+ *
+ *	This is identical to pskb_trim except that the caller knows that
+ *	the skb is not cloned so we should never get an error due to out-
+ *	of-memory.
+ */
+static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len)
+{
+	int err = pskb_trim(skb, len);
+	BUG_ON(err);
+}
+
 /**
  *	skb_orphan - orphan a buffer
  *	@skb: buffer to orphan
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9bf307a2978..4c20f554689 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -947,7 +947,7 @@ alloc_new_skb:
 				skb_prev->csum = csum_sub(skb_prev->csum,
 							  skb->csum);
 				data += fraggap;
-				skb_trim(skb_prev, maxfraglen);
+				pskb_trim_unique(skb_prev, maxfraglen);
 			}
 
 			copy = datalen - transhdrlen - fraggap;
@@ -1142,7 +1142,7 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 					data, fraggap, 0);
 				skb_prev->csum = csum_sub(skb_prev->csum,
 							  skb->csum);
-				skb_trim(skb_prev, maxfraglen);
+				pskb_trim_unique(skb_prev, maxfraglen);
 			}
 
 			/*
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 69451af6abe..4fb47a25291 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1095,7 +1095,7 @@ alloc_new_skb:
 				skb_prev->csum = csum_sub(skb_prev->csum,
 							  skb->csum);
 				data += fraggap;
-				skb_trim(skb_prev, maxfraglen);
+				pskb_trim_unique(skb_prev, maxfraglen);
 			}
 			copy = datalen - transhdrlen - fraggap;
 			if (copy < 0) {
-- 
cgit v1.2.3-70-g09d2


From bb699cbca0096aa3f5f750264ec0af080732375a Mon Sep 17 00:00:00 2001
From: Michal Ruzicka <michal.ruzicka@comstar.cz>
Date: Tue, 15 Aug 2006 00:20:17 -0700
Subject: [IPV4]: Possible leak of multicast source filter sctructure

There is a leak of a socket's multicast source filter list structure
on closing a socket with a multicast source filter set on an interface
that does not exist any more.

Signed-off-by: Michal Ruzicka <michal.ruzicka@comstar.cz>
Acked-by: David L Stevens <dlstevens@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 9f4b752f5a3..e981369ebe1 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2199,13 +2199,13 @@ void ip_mc_drop_socket(struct sock *sk)
 		struct in_device *in_dev;
 		inet->mc_list = iml->next;
 
-		if ((in_dev = inetdev_by_index(iml->multi.imr_ifindex)) != NULL) {
-			(void) ip_mc_leave_src(sk, iml, in_dev);
+		in_dev = inetdev_by_index(iml->multi.imr_ifindex);
+		(void) ip_mc_leave_src(sk, iml, in_dev);
+		if (in_dev != NULL) {
 			ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
 			in_dev_put(in_dev);
 		}
 		sock_kfree_s(sk, iml, sizeof(*iml));
-
 	}
 	rtnl_unlock();
 }
-- 
cgit v1.2.3-70-g09d2


From acd6e00b8e4db542cb6bc9ddfbb4e18bbe29ce4d Mon Sep 17 00:00:00 2001
From: David L Stevens <dlstevens@us.ibm.com>
Date: Thu, 17 Aug 2006 16:27:39 -0700
Subject: [MCAST]: Fix filter leak on device removal.

This fixes source filter leakage when a device is removed and a
process leaves the group thereafter.

This also includes corresponding fixes for IPv6 multicast source
filters on device removal.

Signed-off-by: David L Stevens <dlstevens@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c  | 32 +++++++++++++++++++-------------
 net/ipv6/mcast.c | 10 ++++++----
 2 files changed, 25 insertions(+), 17 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index e981369ebe1..8e8117c19e4 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1793,29 +1793,35 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 	struct in_device *in_dev;
 	u32 group = imr->imr_multiaddr.s_addr;
 	u32 ifindex;
+	int ret = -EADDRNOTAVAIL;
 
 	rtnl_lock();
 	in_dev = ip_mc_find_dev(imr);
-	if (!in_dev) {
-		rtnl_unlock();
-		return -ENODEV;
-	}
 	ifindex = imr->imr_ifindex;
 	for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) {
-		if (iml->multi.imr_multiaddr.s_addr == group &&
-		    iml->multi.imr_ifindex == ifindex) {
-			(void) ip_mc_leave_src(sk, iml, in_dev);
+		if (iml->multi.imr_multiaddr.s_addr != group)
+			continue;
+		if (ifindex) {
+			if (iml->multi.imr_ifindex != ifindex)
+				continue;
+		} else if (imr->imr_address.s_addr && imr->imr_address.s_addr !=
+				iml->multi.imr_address.s_addr)
+			continue;
 
-			*imlp = iml->next;
+		(void) ip_mc_leave_src(sk, iml, in_dev);
 
+		*imlp = iml->next;
+
+		if (in_dev)
 			ip_mc_dec_group(in_dev, group);
-			rtnl_unlock();
-			sock_kfree_s(sk, iml, sizeof(*iml));
-			return 0;
-		}
+		rtnl_unlock();
+		sock_kfree_s(sk, iml, sizeof(*iml));
+		return 0;
 	}
+	if (!in_dev)
+		ret = -ENODEV;
 	rtnl_unlock();
-	return -EADDRNOTAVAIL;
+	return ret;
 }
 
 int ip_mc_source(int add, int omode, struct sock *sk, struct
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9d697d4dcff..639eb20c9f1 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -268,13 +268,14 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
 			if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) {
 				struct inet6_dev *idev = in6_dev_get(dev);
 
+				(void) ip6_mc_leave_src(sk, mc_lst, idev);
 				if (idev) {
-					(void) ip6_mc_leave_src(sk,mc_lst,idev);
 					__ipv6_dev_mc_dec(idev, &mc_lst->addr);
 					in6_dev_put(idev);
 				}
 				dev_put(dev);
-			}
+			} else
+				(void) ip6_mc_leave_src(sk, mc_lst, NULL);
 			sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 			return 0;
 		}
@@ -334,13 +335,14 @@ void ipv6_sock_mc_close(struct sock *sk)
 		if (dev) {
 			struct inet6_dev *idev = in6_dev_get(dev);
 
+			(void) ip6_mc_leave_src(sk, mc_lst, idev);
 			if (idev) {
-				(void) ip6_mc_leave_src(sk, mc_lst, idev);
 				__ipv6_dev_mc_dec(idev, &mc_lst->addr);
 				in6_dev_put(idev);
 			}
 			dev_put(dev);
-		}
+		} else
+			(void) ip6_mc_leave_src(sk, mc_lst, NULL);
 
 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 
-- 
cgit v1.2.3-70-g09d2


From 6e8fcbf64024f9056ba122abbb66554aa76bae5d Mon Sep 17 00:00:00 2001
From: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Date: Thu, 17 Aug 2006 16:44:46 -0700
Subject: [IPV4]: severe locking bug in fib_semantics.c

Found in 2.4 by Yixin Pan <yxpan@hotmail.com>.

> When I read fib_semantics.c of Linux-2.4.32, write_lock(&fib_info_lock) =
> is used in fib_release_info() instead of write_lock_bh(&fib_info_lock).  =
> Is the following case possible: a BH interrupts fib_release_info() while =
> holding the write lock, and calls ip_check_fib_default() which calls =
> read_lock(&fib_info_lock), and spin forever.

Signed-off-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 9be53a8e72c..51738000f3d 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -159,7 +159,7 @@ void free_fib_info(struct fib_info *fi)
 
 void fib_release_info(struct fib_info *fi)
 {
-	write_lock(&fib_info_lock);
+	write_lock_bh(&fib_info_lock);
 	if (fi && --fi->fib_treeref == 0) {
 		hlist_del(&fi->fib_hash);
 		if (fi->fib_prefsrc)
@@ -172,7 +172,7 @@ void fib_release_info(struct fib_info *fi)
 		fi->fib_dead = 1;
 		fib_info_put(fi);
 	}
-	write_unlock(&fib_info_lock);
+	write_unlock_bh(&fib_info_lock);
 }
 
 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
@@ -598,7 +598,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
 	unsigned int old_size = fib_hash_size;
 	unsigned int i, bytes;
 
-	write_lock(&fib_info_lock);
+	write_lock_bh(&fib_info_lock);
 	old_info_hash = fib_info_hash;
 	old_laddrhash = fib_info_laddrhash;
 	fib_hash_size = new_size;
@@ -639,7 +639,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
 	}
 	fib_info_laddrhash = new_laddrhash;
 
-	write_unlock(&fib_info_lock);
+	write_unlock_bh(&fib_info_lock);
 
 	bytes = old_size * sizeof(struct hlist_head *);
 	fib_hash_free(old_info_hash, bytes);
@@ -820,7 +820,7 @@ link_it:
 
 	fi->fib_treeref++;
 	atomic_inc(&fi->fib_clntref);
-	write_lock(&fib_info_lock);
+	write_lock_bh(&fib_info_lock);
 	hlist_add_head(&fi->fib_hash,
 		       &fib_info_hash[fib_info_hashfn(fi)]);
 	if (fi->fib_prefsrc) {
@@ -839,7 +839,7 @@ link_it:
 		head = &fib_info_devhash[hash];
 		hlist_add_head(&nh->nh_hash, head);
 	} endfor_nexthops(fi)
-	write_unlock(&fib_info_lock);
+	write_unlock_bh(&fib_info_lock);
 	return fi;
 
 err_inval:
-- 
cgit v1.2.3-70-g09d2


From d205dc40798d97d63ad348bfaf7394f445d152d4 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 17 Aug 2006 18:12:38 -0700
Subject: [NETFILTER]: ctnetlink: fix deadlock in table dumping

ip_conntrack_put must not be called while holding ip_conntrack_lock
since destroy_conntrack takes it again.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_netlink.c | 17 +++++++----------
 net/netfilter/nf_conntrack_netlink.c      | 17 +++++++----------
 2 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 33891bb1fde..0d4cc92391f 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -415,21 +415,18 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 			cb->args[0], *id);
 
 	read_lock_bh(&ip_conntrack_lock);
+	last = (struct ip_conntrack *)cb->args[1];
 	for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++) {
 restart:
-		last = (struct ip_conntrack *)cb->args[1];
 		list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
 			h = (struct ip_conntrack_tuple_hash *) i;
 			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 				continue;
 			ct = tuplehash_to_ctrack(h);
-			if (last != NULL) {
-				if (ct == last) {
-					ip_conntrack_put(last);
-					cb->args[1] = 0;
-					last = NULL;
-				} else
+			if (cb->args[1]) {
+				if (ct != last)
 					continue;
+				cb->args[1] = 0;
 			}
 			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
 		                        	cb->nlh->nlmsg_seq,
@@ -440,17 +437,17 @@ restart:
 				goto out;
 			}
 		}
-		if (last != NULL) {
-			ip_conntrack_put(last);
+		if (cb->args[1]) {
 			cb->args[1] = 0;
 			goto restart;
 		}
 	}
 out:
 	read_unlock_bh(&ip_conntrack_lock);
+	if (last)
+		ip_conntrack_put(last);
 
 	DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
 	return skb->len;
 }
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index af4845971f7..6527d4e048d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -429,9 +429,9 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 			cb->args[0], *id);
 
 	read_lock_bh(&nf_conntrack_lock);
+	last = (struct nf_conn *)cb->args[1];
 	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
 restart:
-		last = (struct nf_conn *)cb->args[1];
 		list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
 			h = (struct nf_conntrack_tuple_hash *) i;
 			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
@@ -442,13 +442,10 @@ restart:
 			 * then dump everything. */
 			if (l3proto && L3PROTO(ct) != l3proto)
 				continue;
-			if (last != NULL) {
-				if (ct == last) {
-					nf_ct_put(last);
-					cb->args[1] = 0;
-					last = NULL;
-				} else
+			if (cb->args[1]) {
+				if (ct != last)
 					continue;
+				cb->args[1] = 0;
 			}
 			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
 		                        	cb->nlh->nlmsg_seq,
@@ -459,17 +456,17 @@ restart:
 				goto out;
 			}
 		}
-		if (last != NULL) {
-			nf_ct_put(last);
+		if (cb->args[1]) {
 			cb->args[1] = 0;
 			goto restart;
 		}
 	}
 out:
 	read_unlock_bh(&nf_conntrack_lock);
+	if (last)
+		nf_ct_put(last);
 
 	DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
 	return skb->len;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 8311731afc439f508ab4d759edadedae75afb73e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 17 Aug 2006 18:13:53 -0700
Subject: [NETFILTER]: ip_tables: fix table locking in ipt_do_table

table->private might change because of ruleset changes, don't use it without
holding the lock.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_tables.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index f316ff5fd8a..048514f15f2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -230,7 +230,7 @@ ipt_do_table(struct sk_buff **pskb,
 	const char *indev, *outdev;
 	void *table_base;
 	struct ipt_entry *e, *back;
-	struct xt_table_info *private = table->private;
+	struct xt_table_info *private;
 
 	/* Initialization */
 	ip = (*pskb)->nh.iph;
@@ -247,6 +247,7 @@ ipt_do_table(struct sk_buff **pskb,
 
 	read_lock_bh(&table->lock);
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+	private = table->private;
 	table_base = (void *)private->entries[smp_processor_id()];
 	e = get_entry(table_base, private->hook_entry[hook]);
 
-- 
cgit v1.2.3-70-g09d2


From e0b7cde9975e17a61b4511c7822803dfb7210011 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Aug 2006 15:31:08 -0700
Subject: [NETFILTER]: arp_tables: fix table locking in arpt_do_table

table->private might change because of ruleset changes, don't use it
without holding the lock.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index df4854cf598..8d1d7a6e72a 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -236,7 +236,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	struct arpt_entry *e, *back;
 	const char *indev, *outdev;
 	void *table_base;
-	struct xt_table_info *private = table->private;
+	struct xt_table_info *private;
 
 	/* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
 	if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
@@ -248,6 +248,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	outdev = out ? out->name : nulldevname;
 
 	read_lock_bh(&table->lock);
+	private = table->private;
 	table_base = (void *)private->entries[smp_processor_id()];
 	e = get_entry(table_base, private->hook_entry[hook]);
 	back = get_entry(table_base, private->underflow[hook]);
-- 
cgit v1.2.3-70-g09d2


From 316c1592bea94ead75301cb764523661fbbcc1ca Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 22 Aug 2006 00:06:11 -0700
Subject: [TCP]: Limit window scaling if window is clamped.

This small change allows for easy per-route workarounds for broken hosts or
middleboxes that are not compliant with TCP standards for window scaling.
Rather than having to turn off window scaling globally. This patch allows
reducing or disabling window scaling if window clamp is present.

Example: Mark Lord reported a problem with 2.6.17 kernel being unable to
access http://www.everymac.com

# ip route add 216.145.246.23/32 via 10.8.0.1 window 65535

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 507adefbc17..b4f3ffe1b3b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -201,6 +201,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
 		 * See RFC1323 for an explanation of the limit to 14 
 		 */
 		space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
+		space = min_t(u32, space, *window_clamp);
 		while (space > 65535 && (*rcv_wscale) < 14) {
 			space >>= 1;
 			(*rcv_wscale)++;
-- 
cgit v1.2.3-70-g09d2


From 3fdf3f0c99e90e167f0d0643fcc8739e27456697 Mon Sep 17 00:00:00 2001
From: Daikichi Osuga <osugad@s1.nttdocomo.co.jp>
Date: Tue, 29 Aug 2006 02:01:44 -0700
Subject: [TCP]: Two RFC3465 Appropriate Byte Count fixes.

1) fix slow start after retransmit timeout
2) fix case of L=2*SMSS acked bytes comparison

Signed-off-by: Daikichi Osuga <osugad@s1.nttdocomo.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cong.c  | 2 +-
 net/ipv4/tcp_input.c | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 5765f9d0317..7ff2e4273a7 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -189,7 +189,7 @@ void tcp_slow_start(struct tcp_sock *tp)
 			return;
 
 		/* We MAY increase by 2 if discovered delayed ack */
-		if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
+		if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) {
 			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
 				tp->snd_cwnd++;
 		}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 104af5d5bcb..111ff39a08c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2505,8 +2505,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	if (before(ack, prior_snd_una))
 		goto old_ack;
 
-	if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
-		tp->bytes_acked += ack - prior_snd_una;
+	if (sysctl_tcp_abc) {
+		if (icsk->icsk_ca_state < TCP_CA_CWR)
+			tp->bytes_acked += ack - prior_snd_una;
+		else if (icsk->icsk_ca_state == TCP_CA_Loss)
+			/* we assume just one segment left network */
+			tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache);
+	}
 
 	if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
 		/* Window is constant, pure forward advance.
-- 
cgit v1.2.3-70-g09d2


From 0668b47205e42c04e9c1b594573be5a822ac7f09 Mon Sep 17 00:00:00 2001
From: Wei Dong <weid@nanjing-fnst.com>
Date: Thu, 31 Aug 2006 15:24:48 -0700
Subject: [IPV4]: Fix SNMPv2 "ipFragFails" counter error

  When I tested Linux kernel 2.6.17.7 about statistics
"ipFragFails",found that this counter couldn't increase correctly. The
criteria is RFC2011:
RFC2011
  ipFragFails OBJECT-TYPE
    SYNTAX      Counter32
    MAX-ACCESS  read-only
    STATUS      current
    DESCRIPTION
            "The number of IP datagrams that have been discarded because
            they needed to be fragmented at this entity but could not
            be, e.g., because their Don't Fragment flag was set."
    ::= { ip 18 }

When I send big IP packet to a router with DF bit set to 1 which need to
be fragmented, and router just sends an ICMP error message
ICMP_FRAG_NEEDED but no increments for this counter(in the function
ip_fragment).

Signed-off-by: Wei Dong <weid@nanjing-fnst.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4c20f554689..a2ede167e04 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -440,6 +440,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 	iph = skb->nh.iph;
 
 	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
+		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 			  htonl(dst_mtu(&rt->u.dst)));
 		kfree_skb(skb);
-- 
cgit v1.2.3-70-g09d2